mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
Merge pull request #27248 from NousResearch/hermes/hermes-27dc9cc2
refactor(run_agent): extract AIAgent internals into agent/ modules (16k→3.8k lines, 76% reduction)
This commit is contained in:
commit
56ad30de17
19 changed files with 14148 additions and 12694 deletions
1469
agent/agent_init.py
Normal file
1469
agent/agent_init.py
Normal file
File diff suppressed because it is too large
Load diff
2134
agent/agent_runtime_helpers.py
Normal file
2134
agent/agent_runtime_helpers.py
Normal file
File diff suppressed because it is too large
Load diff
570
agent/background_review.py
Normal file
570
agent/background_review.py
Normal file
|
|
@ -0,0 +1,570 @@
|
|||
"""Background memory/skill review — fork the agent to evaluate the turn.
|
||||
|
||||
After every turn, ``AIAgent.run_conversation`` may call
|
||||
:func:`spawn_background_review` to fire off a daemon thread that replays
|
||||
the conversation snapshot in a forked :class:`AIAgent` and asks itself
|
||||
"should any skill/memory be saved or updated?". Writes go straight to
|
||||
the memory + skill stores. Main conversation and prompt cache are never
|
||||
touched.
|
||||
|
||||
The fork inherits the parent's live runtime (provider, model, base_url,
|
||||
credentials, cached system prompt) so it hits the same prefix cache and
|
||||
uses the same auth. It runs with a tool whitelist limited to memory and
|
||||
skill management tools; everything else is denied at runtime.
|
||||
|
||||
See the ``hermes-agent-dev`` skill (``references/self-improvement-loop.md``)
|
||||
for invariants and PR review criteria.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Review-prompt strings — used by ``spawn_background_review_thread`` to build
|
||||
# the user-message that the forked review agent receives. AIAgent exposes
|
||||
# them as class attributes (``_MEMORY_REVIEW_PROMPT`` etc.) for back-compat;
|
||||
# the actual text lives here so future edits are one-place.
|
||||
_MEMORY_REVIEW_PROMPT = (
|
||||
"Review the conversation above and consider saving to memory if appropriate.\n\n"
|
||||
"Focus on:\n"
|
||||
"1. Has the user revealed things about themselves — their persona, desires, "
|
||||
"preferences, or personal details worth remembering?\n"
|
||||
"2. Has the user expressed expectations about how you should behave, their work "
|
||||
"style, or ways they want you to operate?\n\n"
|
||||
"If something stands out, save it using the memory tool. "
|
||||
"If nothing is worth saving, just say 'Nothing to save.' and stop."
|
||||
)
|
||||
|
||||
_SKILL_REVIEW_PROMPT = (
|
||||
"Review the conversation above and update the skill library. Be "
|
||||
"ACTIVE — most sessions produce at least one skill update, even if "
|
||||
"small. A pass that does nothing is a missed learning opportunity, "
|
||||
"not a neutral outcome.\n\n"
|
||||
"Target shape of the library: CLASS-LEVEL skills, each with a rich "
|
||||
"SKILL.md and a `references/` directory for session-specific detail. "
|
||||
"Not a long flat list of narrow one-session-one-skill entries. This "
|
||||
"shapes HOW you update, not WHETHER you update.\n\n"
|
||||
"Signals to look for (any one of these warrants action):\n"
|
||||
" • User corrected your style, tone, format, legibility, or "
|
||||
"verbosity. Frustration signals like 'stop doing X', 'this is too "
|
||||
"verbose', 'don't format like this', 'why are you explaining', "
|
||||
"'just give me the answer', 'you always do Y and I hate it', or an "
|
||||
"explicit 'remember this' are FIRST-CLASS skill signals, not just "
|
||||
"memory signals. Update the relevant skill(s) to embed the "
|
||||
"preference so the next session starts already knowing.\n"
|
||||
" • User corrected your workflow, approach, or sequence of steps. "
|
||||
"Encode the correction as a pitfall or explicit step in the skill "
|
||||
"that governs that class of task.\n"
|
||||
" • Non-trivial technique, fix, workaround, debugging path, or "
|
||||
"tool-usage pattern emerged that a future session would benefit "
|
||||
"from. Capture it.\n"
|
||||
" • A skill that got loaded or consulted this session turned out "
|
||||
"to be wrong, missing a step, or outdated. Patch it NOW.\n\n"
|
||||
"Preference order — prefer the earliest action that fits, but do "
|
||||
"pick one when a signal above fired:\n"
|
||||
" 1. UPDATE A CURRENTLY-LOADED SKILL. Look back through the "
|
||||
"conversation for skills the user loaded via /skill-name or you "
|
||||
"read via skill_view. If any of them covers the territory of the "
|
||||
"new learning, PATCH that one first. It is the skill that was in "
|
||||
"play, so it's the right one to extend.\n"
|
||||
" 2. UPDATE AN EXISTING UMBRELLA (via skills_list + skill_view). "
|
||||
"If no loaded skill fits but an existing class-level skill does, "
|
||||
"patch it. Add a subsection, a pitfall, or broaden a trigger.\n"
|
||||
" 3. ADD A SUPPORT FILE under an existing umbrella. Skills can be "
|
||||
"packaged with three kinds of support files — use the right "
|
||||
"directory per kind:\n"
|
||||
" • `references/<topic>.md` — session-specific detail (error "
|
||||
"transcripts, reproduction recipes, provider quirks) AND "
|
||||
"condensed knowledge banks: quoted research, API docs, external "
|
||||
"authoritative excerpts, or domain notes you found while working "
|
||||
"on the problem. Write it concise and for the value of the task, "
|
||||
"not as a full mirror of upstream docs.\n"
|
||||
" • `templates/<name>.<ext>` — starter files meant to be "
|
||||
"copied and modified (boilerplate configs, scaffolding, a "
|
||||
"known-good example the agent can `reproduce with modifications`).\n"
|
||||
" • `scripts/<name>.<ext>` — statically re-runnable actions "
|
||||
"the skill can invoke directly (verification scripts, fixture "
|
||||
"generators, deterministic probes, anything the agent should run "
|
||||
"rather than hand-type each time).\n"
|
||||
" Add support files via skill_manage action=write_file with "
|
||||
"file_path starting 'references/', 'templates/', or 'scripts/'. "
|
||||
"The umbrella's SKILL.md should gain a one-line pointer to any "
|
||||
"new support file so future agents know it exists.\n"
|
||||
" 4. CREATE A NEW CLASS-LEVEL UMBRELLA SKILL when no existing "
|
||||
"skill covers the class. The name MUST be at the class level. "
|
||||
"The name MUST NOT be a specific PR number, error string, feature "
|
||||
"codename, library-alone name, or 'fix-X / debug-Y / audit-Z-today' "
|
||||
"session artifact. If the proposed name only makes sense for "
|
||||
"today's task, it's wrong — fall back to (1), (2), or (3).\n\n"
|
||||
"User-preference embedding (important): when the user expressed a "
|
||||
"style/format/workflow preference, the update belongs in the "
|
||||
"SKILL.md body, not just in memory. Memory captures 'who the user "
|
||||
"is and what the current situation and state of your operations "
|
||||
"are'; skills capture 'how to do this class of task for this "
|
||||
"user'. When they complain about how you handled a task, the "
|
||||
"skill that governs that task needs to carry the lesson.\n\n"
|
||||
"If you notice two existing skills that overlap, note it in your "
|
||||
"reply — the background curator handles consolidation at scale.\n\n"
|
||||
"Do NOT capture (these become persistent self-imposed constraints "
|
||||
"that bite you later when the environment changes):\n"
|
||||
" • Environment-dependent failures: missing binaries, fresh-install "
|
||||
"errors, post-migration path mismatches, 'command not found', "
|
||||
"unconfigured credentials, uninstalled packages. The user can fix "
|
||||
"these — they are not durable rules.\n"
|
||||
" • Negative claims about tools or features ('browser tools do not "
|
||||
"work', 'X tool is broken', 'cannot use Y from execute_code'). These "
|
||||
"harden into refusals the agent cites against itself for months "
|
||||
"after the actual problem was fixed.\n"
|
||||
" • Session-specific transient errors that resolved before the "
|
||||
"conversation ended. If retrying worked, the lesson is the retry "
|
||||
"pattern, not the original failure.\n"
|
||||
" • One-off task narratives. A user asking 'summarize today's "
|
||||
"market' or 'analyze this PR' is not a class of work that warrants "
|
||||
"a skill.\n\n"
|
||||
"If a tool failed because of setup state, capture the FIX (install "
|
||||
"command, config step, env var to set) under an existing setup or "
|
||||
"troubleshooting skill — never 'this tool does not work' as a "
|
||||
"standalone constraint.\n\n"
|
||||
"'Nothing to save.' is a real option but should NOT be the "
|
||||
"default. If the session ran smoothly with no corrections and "
|
||||
"produced no new technique, just say 'Nothing to save.' and stop. "
|
||||
"Otherwise, act."
|
||||
)
|
||||
|
||||
_COMBINED_REVIEW_PROMPT = (
|
||||
"Review the conversation above and update two things:\n\n"
|
||||
"**Memory**: who the user is. Did the user reveal persona, "
|
||||
"desires, preferences, personal details, or expectations about "
|
||||
"how you should behave? Save facts about the user and durable "
|
||||
"preferences with the memory tool.\n\n"
|
||||
"**Skills**: how to do this class of task. Be ACTIVE — most "
|
||||
"sessions produce at least one skill update. A pass that does "
|
||||
"nothing is a missed learning opportunity, not a neutral outcome.\n\n"
|
||||
"Target shape of the skill library: CLASS-LEVEL skills with a rich "
|
||||
"SKILL.md and a `references/` directory for session-specific detail. "
|
||||
"Not a long flat list of narrow one-session-one-skill entries.\n\n"
|
||||
"Signals that warrant a skill update (any one is enough):\n"
|
||||
" • User corrected your style, tone, format, legibility, "
|
||||
"verbosity, or approach. Frustration is a FIRST-CLASS skill "
|
||||
"signal, not just a memory signal. 'stop doing X', 'don't format "
|
||||
"like this', 'I hate when you Y' — embed the lesson in the skill "
|
||||
"that governs that task so the next session starts fixed.\n"
|
||||
" • Non-trivial technique, fix, workaround, or debugging path "
|
||||
"emerged.\n"
|
||||
" • A skill that was loaded or consulted turned out wrong, "
|
||||
"missing, or outdated — patch it now.\n\n"
|
||||
"Preference order for skills — pick the earliest that fits:\n"
|
||||
" 1. UPDATE A CURRENTLY-LOADED SKILL. Check what skills were "
|
||||
"loaded via /skill-name or skill_view in the conversation. If one "
|
||||
"of them covers the learning, PATCH it first. It was in play; "
|
||||
"it's the right place.\n"
|
||||
" 2. UPDATE AN EXISTING UMBRELLA (skills_list + skill_view to "
|
||||
"find the right one). Patch it.\n"
|
||||
" 3. ADD A SUPPORT FILE under an existing umbrella via "
|
||||
"skill_manage action=write_file. Three kinds: "
|
||||
"`references/<topic>.md` for session-specific detail OR condensed "
|
||||
"knowledge banks (quoted research, API docs excerpts, domain "
|
||||
"notes) written concise and task-focused; `templates/<name>.<ext>` "
|
||||
"for starter files meant to be copied and modified; "
|
||||
"`scripts/<name>.<ext>` for statically re-runnable actions "
|
||||
"(verification, fixture generators, probes). Add a one-line "
|
||||
"pointer in SKILL.md so future agents find them.\n"
|
||||
" 4. CREATE A NEW CLASS-LEVEL UMBRELLA when nothing exists. "
|
||||
"Name at the class level — NOT a PR number, error string, "
|
||||
"codename, library-alone name, or 'fix-X / debug-Y' session "
|
||||
"artifact. If the name only fits today's task, fall back to (1), "
|
||||
"(2), or (3).\n\n"
|
||||
"User-preference embedding: when the user complains about how "
|
||||
"you handled a task, update the skill that governs that task — "
|
||||
"memory alone isn't enough. Memory says 'who the user is and "
|
||||
"what the current situation and state of your operations are'; "
|
||||
"skills say 'how to do this class of task for this user'. Both "
|
||||
"should carry user-preference lessons when relevant.\n\n"
|
||||
"If you notice overlapping existing skills, mention it — the "
|
||||
"background curator handles consolidation.\n\n"
|
||||
"Do NOT capture as skills (these become persistent self-imposed "
|
||||
"constraints that bite you later when the environment changes):\n"
|
||||
" • Environment-dependent failures: missing binaries, fresh-install "
|
||||
"errors, post-migration path mismatches, 'command not found', "
|
||||
"unconfigured credentials, uninstalled packages. The user can fix "
|
||||
"these — they are not durable rules.\n"
|
||||
" • Negative claims about tools or features ('browser tools do not "
|
||||
"work', 'X tool is broken', 'cannot use Y from execute_code'). These "
|
||||
"harden into refusals the agent cites against itself for months "
|
||||
"after the actual problem was fixed.\n"
|
||||
" • Session-specific transient errors that resolved before the "
|
||||
"conversation ended. If retrying worked, the lesson is the retry "
|
||||
"pattern, not the original failure.\n"
|
||||
" • One-off task narratives. A user asking 'summarize today's "
|
||||
"market' or 'analyze this PR' is not a class of work that warrants "
|
||||
"a skill.\n\n"
|
||||
"If a tool failed because of setup state, capture the FIX (install "
|
||||
"command, config step, env var to set) under an existing setup or "
|
||||
"troubleshooting skill — never 'this tool does not work' as a "
|
||||
"standalone constraint.\n\n"
|
||||
"Act on whichever of the two dimensions has real signal. If "
|
||||
"genuinely nothing stands out on either, say 'Nothing to save.' "
|
||||
"and stop — but don't reach for that conclusion as a default."
|
||||
)
|
||||
|
||||
|
||||
|
||||
def summarize_background_review_actions(
|
||||
review_messages: List[Dict],
|
||||
prior_snapshot: List[Dict],
|
||||
) -> List[str]:
|
||||
"""Build the human-facing action summary for a background review pass.
|
||||
|
||||
Walks the review agent's session messages and collects "successful tool
|
||||
action" descriptions to surface to the user (e.g. "Memory updated").
|
||||
Tool messages already present in ``prior_snapshot`` are skipped so we
|
||||
don't re-surface stale results from the prior conversation that the
|
||||
review agent inherited via ``conversation_history`` (issue #14944).
|
||||
|
||||
Matching is by ``tool_call_id`` when available, with a content-equality
|
||||
fallback for tool messages that lack one.
|
||||
"""
|
||||
existing_tool_call_ids = set()
|
||||
existing_tool_contents = set()
|
||||
for prior in prior_snapshot or []:
|
||||
if not isinstance(prior, dict) or prior.get("role") != "tool":
|
||||
continue
|
||||
tcid = prior.get("tool_call_id")
|
||||
if tcid:
|
||||
existing_tool_call_ids.add(tcid)
|
||||
else:
|
||||
content = prior.get("content")
|
||||
if isinstance(content, str):
|
||||
existing_tool_contents.add(content)
|
||||
|
||||
actions: List[str] = []
|
||||
for msg in review_messages or []:
|
||||
if not isinstance(msg, dict) or msg.get("role") != "tool":
|
||||
continue
|
||||
tcid = msg.get("tool_call_id")
|
||||
if tcid and tcid in existing_tool_call_ids:
|
||||
continue
|
||||
if not tcid:
|
||||
content_str = msg.get("content")
|
||||
if isinstance(content_str, str) and content_str in existing_tool_contents:
|
||||
continue
|
||||
try:
|
||||
data = json.loads(msg.get("content", "{}"))
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
continue
|
||||
if not isinstance(data, dict) or not data.get("success"):
|
||||
continue
|
||||
message = data.get("message", "")
|
||||
target = data.get("target", "")
|
||||
if "created" in message.lower():
|
||||
actions.append(message)
|
||||
elif "updated" in message.lower():
|
||||
actions.append(message)
|
||||
elif "added" in message.lower() or (target and "add" in message.lower()):
|
||||
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
||||
actions.append(f"{label} updated")
|
||||
elif "Entry added" in message:
|
||||
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
||||
actions.append(f"{label} updated")
|
||||
elif "removed" in message.lower() or "replaced" in message.lower():
|
||||
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
||||
actions.append(f"{label} updated")
|
||||
return actions
|
||||
|
||||
|
||||
def build_memory_write_metadata(
|
||||
agent: Any,
|
||||
*,
|
||||
write_origin: Optional[str] = None,
|
||||
execution_context: Optional[str] = None,
|
||||
task_id: Optional[str] = None,
|
||||
tool_call_id: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build provenance metadata for external memory-provider mirrors."""
|
||||
metadata: Dict[str, Any] = {
|
||||
"write_origin": write_origin or getattr(agent, "_memory_write_origin", "assistant_tool"),
|
||||
"execution_context": (
|
||||
execution_context
|
||||
or getattr(agent, "_memory_write_context", "foreground")
|
||||
),
|
||||
"session_id": agent.session_id or "",
|
||||
"parent_session_id": agent._parent_session_id or "",
|
||||
"platform": agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
|
||||
"tool_name": "memory",
|
||||
}
|
||||
if task_id:
|
||||
metadata["task_id"] = task_id
|
||||
if tool_call_id:
|
||||
metadata["tool_call_id"] = tool_call_id
|
||||
return {k: v for k, v in metadata.items() if v not in {None, ""}}
|
||||
|
||||
|
||||
def _run_review_in_thread(
|
||||
agent: Any,
|
||||
messages_snapshot: List[Dict],
|
||||
prompt: str,
|
||||
) -> None:
|
||||
"""Worker function executed in the background-review daemon thread.
|
||||
|
||||
Spawns a forked ``AIAgent`` inheriting the parent's runtime, runs the
|
||||
review prompt, and surfaces a compact action summary back to the user
|
||||
via ``agent._safe_print`` and ``agent.background_review_callback``.
|
||||
"""
|
||||
# Local import to avoid a hard circular dep at module load.
|
||||
from run_agent import AIAgent
|
||||
from tools.terminal_tool import set_approval_callback as _set_approval_callback
|
||||
|
||||
# Install a non-interactive approval callback on this worker
|
||||
# thread so any dangerous-command guard the review agent trips
|
||||
# resolves to "deny" instead of falling back to input() -- which
|
||||
# deadlocks against the parent's prompt_toolkit TUI (#15216).
|
||||
# Same pattern as _subagent_auto_deny in tools/delegate_tool.py.
|
||||
def _bg_review_auto_deny(command, description, **kwargs):
|
||||
logger.warning(
|
||||
"Background review auto-denied dangerous command: %s (%s)",
|
||||
command, description,
|
||||
)
|
||||
return "deny"
|
||||
try:
|
||||
_set_approval_callback(_bg_review_auto_deny)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
review_agent = None
|
||||
review_messages: List[Dict] = []
|
||||
try:
|
||||
with open(os.devnull, "w", encoding="utf-8") as _devnull, \
|
||||
contextlib.redirect_stdout(_devnull), \
|
||||
contextlib.redirect_stderr(_devnull):
|
||||
# Inherit the parent agent's live runtime (provider, model,
|
||||
# base_url, api_key, api_mode) so the fork uses the exact
|
||||
# same credentials the main turn is using. Without this,
|
||||
# AIAgent.__init__ re-runs auto-resolution from env vars,
|
||||
# which fails for OAuth-only providers, session-scoped
|
||||
# creds, or credential-pool setups where the resolver can't
|
||||
# reconstruct auth from scratch -- producing the spurious
|
||||
# "No LLM provider configured" warning at end of turn.
|
||||
_parent_runtime = agent._current_main_runtime()
|
||||
_parent_api_mode = _parent_runtime.get("api_mode") or None
|
||||
# The review fork needs to call agent-loop tools (memory,
|
||||
# skill_manage). Those tools require Hermes' own dispatch,
|
||||
# which the codex_app_server runtime bypasses entirely
|
||||
# (it runs the turn inside codex's subprocess). So when
|
||||
# the parent is on codex_app_server, downgrade the review
|
||||
# fork to codex_responses — same auth/credentials, but
|
||||
# talks to the OpenAI Responses API directly so Hermes
|
||||
# owns the loop and the agent-loop tools dispatch.
|
||||
if _parent_api_mode == "codex_app_server":
|
||||
_parent_api_mode = "codex_responses"
|
||||
# skip_memory=True keeps the review fork from
|
||||
# touching external memory plugins (honcho, mem0,
|
||||
# supermemory, etc.). Without it, the fork's
|
||||
# __init__ rebuilds its own _memory_manager from
|
||||
# config, scoped to the parent's session_id, and
|
||||
# run_conversation() then leaks the harness prompt
|
||||
# into the user's real memory namespace via three
|
||||
# ingestion sites: on_turn_start (cadence + turn
|
||||
# message), prefetch_all (recall query), and
|
||||
# sync_all (harness prompt + review output recorded
|
||||
# as a (user, assistant) turn pair). Built-in
|
||||
# MEMORY.md / USER.md state is re-bound from the
|
||||
# parent below so memory(action="add") writes from
|
||||
# the review still land on disk; the review just
|
||||
# has zero side effects on external providers.
|
||||
review_agent = AIAgent(
|
||||
model=agent.model,
|
||||
max_iterations=16,
|
||||
quiet_mode=True,
|
||||
platform=agent.platform,
|
||||
provider=agent.provider,
|
||||
api_mode=_parent_api_mode,
|
||||
base_url=_parent_runtime.get("base_url") or None,
|
||||
api_key=_parent_runtime.get("api_key") or None,
|
||||
credential_pool=getattr(agent, "_credential_pool", None),
|
||||
parent_session_id=agent.session_id,
|
||||
skip_memory=True,
|
||||
)
|
||||
review_agent._memory_write_origin = "background_review"
|
||||
review_agent._memory_write_context = "background_review"
|
||||
review_agent._memory_store = agent._memory_store
|
||||
review_agent._memory_enabled = agent._memory_enabled
|
||||
review_agent._user_profile_enabled = agent._user_profile_enabled
|
||||
review_agent._memory_nudge_interval = 0
|
||||
review_agent._skill_nudge_interval = 0
|
||||
# Suppress all status/warning emits from the fork so the
|
||||
# user only sees the final successful-action summary.
|
||||
# Without this, mid-review "Iteration budget exhausted",
|
||||
# rate-limit retries, compression warnings, and other
|
||||
# lifecycle messages bubble up through _emit_status ->
|
||||
# _vprint and leak past the stdout redirect (they go via
|
||||
# _print_fn/status_callback, which bypass sys.stdout).
|
||||
review_agent.suppress_status_output = True
|
||||
# Inherit the parent's cached system prompt verbatim so
|
||||
# the review fork's outbound HTTP request hits the same
|
||||
# Anthropic/OpenRouter prefix cache the parent warmed.
|
||||
# Without this, the fork rebuilds the system prompt from
|
||||
# scratch (fresh _hermes_now() timestamp, fresh
|
||||
# session_id, narrower toolset → different skills_prompt)
|
||||
# and the byte-exact prefix-cache key misses. See
|
||||
# issue #25322 and PR #17276 for the full analysis +
|
||||
# measured impact (~26% end-to-end cost reduction on
|
||||
# Sonnet 4.5).
|
||||
review_agent._cached_system_prompt = agent._cached_system_prompt
|
||||
# Defensive: pin session_start + session_id to the
|
||||
# parent's so any code path that re-renders parts of
|
||||
# the system prompt (compression, plugin hooks) still
|
||||
# produces byte-identical output. The cached-prompt
|
||||
# assignment above already short-circuits the normal
|
||||
# rebuild path, but these pins guarantee parity even
|
||||
# if a future code path bypasses the cache.
|
||||
review_agent.session_start = agent.session_start
|
||||
review_agent.session_id = agent.session_id
|
||||
|
||||
from model_tools import get_tool_definitions
|
||||
from hermes_cli.plugins import (
|
||||
set_thread_tool_whitelist,
|
||||
clear_thread_tool_whitelist,
|
||||
)
|
||||
|
||||
review_whitelist = {
|
||||
t["function"]["name"]
|
||||
for t in get_tool_definitions(
|
||||
enabled_toolsets=["memory", "skills"],
|
||||
quiet_mode=True,
|
||||
)
|
||||
}
|
||||
set_thread_tool_whitelist(
|
||||
review_whitelist,
|
||||
deny_msg_fmt=(
|
||||
"Background review denied non-whitelisted tool: "
|
||||
"{tool_name}. Only memory/skill tools are allowed."
|
||||
),
|
||||
)
|
||||
try:
|
||||
review_agent.run_conversation(
|
||||
user_message=(
|
||||
prompt
|
||||
+ "\n\nYou can only call memory and skill "
|
||||
"management tools. Other tools will be denied "
|
||||
"at runtime — do not attempt them."
|
||||
),
|
||||
conversation_history=messages_snapshot,
|
||||
)
|
||||
finally:
|
||||
clear_thread_tool_whitelist()
|
||||
|
||||
# Tear down memory providers while stdout is still
|
||||
# redirected so background thread teardown (Honcho flush,
|
||||
# Hindsight sync, etc.) stays silent. The finally block
|
||||
# below is a safety net for the exception path.
|
||||
try:
|
||||
review_agent.shutdown_memory_provider()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
review_agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
review_messages = list(getattr(review_agent, "_session_messages", []))
|
||||
review_agent = None
|
||||
|
||||
# Scan the review agent's messages for successful tool actions
|
||||
# and surface a compact summary to the user. Tool messages
|
||||
# already present in messages_snapshot must be skipped, since
|
||||
# the review agent inherits that history and would otherwise
|
||||
# re-surface stale "created"/"updated" messages from the prior
|
||||
# conversation as if they just happened (issue #14944).
|
||||
actions = summarize_background_review_actions(
|
||||
review_messages,
|
||||
messages_snapshot,
|
||||
)
|
||||
|
||||
if actions:
|
||||
summary = " · ".join(dict.fromkeys(actions))
|
||||
agent._safe_print(
|
||||
f" 💾 Self-improvement review: {summary}"
|
||||
)
|
||||
_bg_cb = agent.background_review_callback
|
||||
if _bg_cb:
|
||||
try:
|
||||
_bg_cb(
|
||||
f"💾 Self-improvement review: {summary}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Background memory/skill review failed: %s", e)
|
||||
agent._emit_auxiliary_failure("background review", e)
|
||||
finally:
|
||||
# Safety-net cleanup for the exception path. Normal
|
||||
# completion already shut down inside redirect_stdout above.
|
||||
# Re-open devnull here so any teardown output (Honcho flush,
|
||||
# Hindsight sync, background thread joins) stays silent even
|
||||
# on the exception path where redirect_stdout already exited.
|
||||
if review_agent is not None:
|
||||
try:
|
||||
with open(os.devnull, "w", encoding="utf-8") as _fn, \
|
||||
contextlib.redirect_stdout(_fn), \
|
||||
contextlib.redirect_stderr(_fn):
|
||||
try:
|
||||
review_agent.shutdown_memory_provider()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
review_agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
# Clear the approval callback on this bg-review thread so a
|
||||
# recycled thread-id doesn't inherit a stale reference.
|
||||
try:
|
||||
_set_approval_callback(None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def spawn_background_review_thread(
|
||||
agent: Any,
|
||||
messages_snapshot: List[Dict],
|
||||
review_memory: bool = False,
|
||||
review_skills: bool = False,
|
||||
):
|
||||
"""Build the review thread target and prompt for a background review.
|
||||
|
||||
Returns a ``(target, prompt)`` tuple. The caller (``AIAgent._spawn_background_review``)
|
||||
owns the actual ``threading.Thread`` construction so test-level patches
|
||||
of ``run_agent.threading.Thread`` keep working.
|
||||
"""
|
||||
# Pick the right prompt based on which triggers fired. Allow per-agent
|
||||
# override (the prompts moved to module-level constants but old code paths
|
||||
# that set agent._MEMORY_REVIEW_PROMPT etc. directly keep working).
|
||||
if review_memory and review_skills:
|
||||
prompt = getattr(agent, "_COMBINED_REVIEW_PROMPT", _COMBINED_REVIEW_PROMPT)
|
||||
elif review_memory:
|
||||
prompt = getattr(agent, "_MEMORY_REVIEW_PROMPT", _MEMORY_REVIEW_PROMPT)
|
||||
else:
|
||||
prompt = getattr(agent, "_SKILL_REVIEW_PROMPT", _SKILL_REVIEW_PROMPT)
|
||||
|
||||
def _target() -> None:
|
||||
_run_review_in_thread(agent, messages_snapshot, prompt)
|
||||
|
||||
return _target, prompt
|
||||
|
||||
|
||||
__all__ = [
|
||||
"_MEMORY_REVIEW_PROMPT",
|
||||
"_SKILL_REVIEW_PROMPT",
|
||||
"_COMBINED_REVIEW_PROMPT",
|
||||
"spawn_background_review_thread",
|
||||
"summarize_background_review_actions",
|
||||
"build_memory_write_metadata",
|
||||
]
|
||||
2043
agent/chat_completion_helpers.py
Normal file
2043
agent/chat_completion_helpers.py
Normal file
File diff suppressed because it is too large
Load diff
448
agent/codex_runtime.py
Normal file
448
agent/codex_runtime.py
Normal file
|
|
@ -0,0 +1,448 @@
|
|||
"""Codex API runtime — App Server and Responses-API streaming paths.
|
||||
|
||||
Extracted from :class:`AIAgent` to keep the agent loop file focused.
|
||||
Each function takes the parent ``AIAgent`` as its first argument
|
||||
(``agent``). AIAgent keeps thin forwarder methods for backward
|
||||
compatibility.
|
||||
|
||||
* ``run_codex_app_server_turn`` — drives one turn through the
|
||||
``codex_app_server`` subprocess client (used when a Codex CLI install
|
||||
is the active provider).
|
||||
* ``run_codex_stream`` — streams a Codex Responses API call (the
|
||||
``codex_responses`` api_mode).
|
||||
* ``run_codex_create_stream_fallback`` — recovery path when the
|
||||
Responses ``stream=True`` initial create fails.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_codex_app_server_turn(
|
||||
agent,
|
||||
*,
|
||||
user_message: str,
|
||||
original_user_message: Any,
|
||||
messages: List[Dict[str, Any]],
|
||||
effective_task_id: str,
|
||||
should_review_memory: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Codex app-server runtime path. Hands the entire turn to a `codex
|
||||
app-server` subprocess and projects its events back into Hermes'
|
||||
messages list so memory/skill review keep working.
|
||||
|
||||
Called from run_conversation() when agent.api_mode == "codex_app_server".
|
||||
Returns the same dict shape as the chat_completions path.
|
||||
"""
|
||||
from agent.transports.codex_app_server_session import CodexAppServerSession
|
||||
|
||||
# Lazy session: one CodexAppServerSession per AIAgent instance.
|
||||
# Spawned on first turn, reused across turns, closed at AIAgent
|
||||
# shutdown (see _cleanup hook).
|
||||
if not hasattr(agent, "_codex_session") or agent._codex_session is None:
|
||||
cwd = getattr(agent, "session_cwd", None) or os.getcwd()
|
||||
# Approval callback: defer to Hermes' standard prompt flow if a
|
||||
# CLI thread has installed one. Gateway / cron contexts get the
|
||||
# codex-side fail-closed default.
|
||||
try:
|
||||
from tools.terminal_tool import _get_approval_callback
|
||||
approval_callback = _get_approval_callback()
|
||||
except Exception:
|
||||
approval_callback = None
|
||||
agent._codex_session = CodexAppServerSession(
|
||||
cwd=cwd,
|
||||
approval_callback=approval_callback,
|
||||
)
|
||||
|
||||
# NOTE: the user message is ALREADY appended to messages by the
|
||||
# standard run_conversation() flow (line ~11823) before the early
|
||||
# return reaches us. Do NOT append again — that would duplicate.
|
||||
|
||||
try:
|
||||
turn = agent._codex_session.run_turn(user_input=user_message)
|
||||
except Exception as exc:
|
||||
logger.exception("codex app-server turn failed")
|
||||
# Crash → unconditionally drop the session so the next turn
|
||||
# respawns from scratch instead of reusing a dead client.
|
||||
try:
|
||||
agent._codex_session.close()
|
||||
except Exception:
|
||||
pass
|
||||
agent._codex_session = None
|
||||
return {
|
||||
"final_response": (
|
||||
f"Codex app-server turn failed: {exc}. "
|
||||
f"Fall back to default runtime with `/codex-runtime auto`."
|
||||
),
|
||||
"messages": messages,
|
||||
"api_calls": 0,
|
||||
"completed": False,
|
||||
"partial": True,
|
||||
"error": str(exc),
|
||||
}
|
||||
|
||||
# If the turn signalled the underlying client is wedged (deadline
|
||||
# blown, post-tool watchdog tripped, OAuth refresh died, subprocess
|
||||
# exited), retire the session so the next turn respawns codex
|
||||
# rather than riding the broken process. Mirrors openclaw beta.8's
|
||||
# "retire timed-out app-server clients" fix.
|
||||
if getattr(turn, "should_retire", False):
|
||||
logger.warning(
|
||||
"codex app-server session retired (turn error: %s)",
|
||||
turn.error,
|
||||
)
|
||||
try:
|
||||
agent._codex_session.close()
|
||||
except Exception:
|
||||
pass
|
||||
agent._codex_session = None
|
||||
|
||||
# Splice projected messages into the conversation. The projector emits
|
||||
# standard {role, content, tool_calls, tool_call_id} entries, which
|
||||
# is exactly what curator.py / sessions DB expect.
|
||||
if turn.projected_messages:
|
||||
messages.extend(turn.projected_messages)
|
||||
|
||||
# Counter ticks for the agent-improvement loop.
|
||||
# _turns_since_memory and _user_turn_count are ALREADY incremented
|
||||
# in the run_conversation() pre-loop block (lines ~11793-11817) so we
|
||||
# do NOT touch them here — that would double-count.
|
||||
# Only _iters_since_skill needs explicit increment, since the
|
||||
# chat_completions loop bumps it per tool iteration (line ~12110)
|
||||
# and that loop is bypassed on this path.
|
||||
agent._iters_since_skill = (
|
||||
getattr(agent, "_iters_since_skill", 0) + turn.tool_iterations
|
||||
)
|
||||
|
||||
# Now check the skill nudge AFTER iters were incremented — same
|
||||
# pattern the chat_completions path uses (line ~15432).
|
||||
should_review_skills = False
|
||||
if (
|
||||
agent._skill_nudge_interval > 0
|
||||
and agent._iters_since_skill >= agent._skill_nudge_interval
|
||||
and "skill_manage" in agent.valid_tool_names
|
||||
):
|
||||
should_review_skills = True
|
||||
agent._iters_since_skill = 0
|
||||
|
||||
# External memory provider sync (mirrors line ~15439). Skipped on
|
||||
# interrupt/error to avoid feeding partial transcripts to memory.
|
||||
if not turn.interrupted and turn.error is None:
|
||||
try:
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message=original_user_message,
|
||||
final_response=turn.final_text,
|
||||
interrupted=False,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("external memory sync raised", exc_info=True)
|
||||
|
||||
# Background review fork — same cadence + signature as the default
|
||||
# path (line ~15449). Only fires when a trigger actually tripped AND
|
||||
# we have a real final response.
|
||||
if (
|
||||
turn.final_text
|
||||
and not turn.interrupted
|
||||
and (should_review_memory or should_review_skills)
|
||||
):
|
||||
try:
|
||||
agent._spawn_background_review(
|
||||
messages_snapshot=list(messages),
|
||||
review_memory=should_review_memory,
|
||||
review_skills=should_review_skills,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("background review spawn raised", exc_info=True)
|
||||
|
||||
return {
|
||||
"final_response": turn.final_text,
|
||||
"messages": messages,
|
||||
"api_calls": 1, # one app-server "turn" maps to one logical API call
|
||||
"completed": not turn.interrupted and turn.error is None,
|
||||
"partial": turn.interrupted or turn.error is not None,
|
||||
"error": turn.error,
|
||||
"codex_thread_id": turn.thread_id,
|
||||
"codex_turn_id": turn.turn_id,
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
def run_codex_stream(agent, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
|
||||
"""Execute one streaming Responses API request and return the final response."""
|
||||
import httpx as _httpx
|
||||
|
||||
active_client = client or agent._ensure_primary_openai_client(reason="codex_stream_direct")
|
||||
max_stream_retries = 1
|
||||
has_tool_calls = False
|
||||
first_delta_fired = False
|
||||
# Accumulate streamed text so we can recover if get_final_response()
|
||||
# returns empty output (e.g. chatgpt.com backend-api sends
|
||||
# response.incomplete instead of response.completed).
|
||||
agent._codex_streamed_text_parts: list = []
|
||||
for attempt in range(max_stream_retries + 1):
|
||||
if agent._interrupt_requested:
|
||||
raise InterruptedError("Agent interrupted before Codex stream retry")
|
||||
collected_output_items: list = []
|
||||
try:
|
||||
with active_client.responses.stream(**api_kwargs) as stream:
|
||||
for event in stream:
|
||||
agent._touch_activity("receiving stream response")
|
||||
if agent._interrupt_requested:
|
||||
break
|
||||
event_type = getattr(event, "type", "")
|
||||
# Fire callbacks on text content deltas (suppress during tool calls)
|
||||
if "output_text.delta" in event_type or event_type == "response.output_text.delta":
|
||||
delta_text = getattr(event, "delta", "")
|
||||
if delta_text:
|
||||
agent._codex_streamed_text_parts.append(delta_text)
|
||||
if delta_text and not has_tool_calls:
|
||||
if not first_delta_fired:
|
||||
first_delta_fired = True
|
||||
if on_first_delta:
|
||||
try:
|
||||
on_first_delta()
|
||||
except Exception:
|
||||
pass
|
||||
agent._fire_stream_delta(delta_text)
|
||||
# Track tool calls to suppress text streaming
|
||||
elif "function_call" in event_type:
|
||||
has_tool_calls = True
|
||||
# Fire reasoning callbacks
|
||||
elif "reasoning" in event_type and "delta" in event_type:
|
||||
reasoning_text = getattr(event, "delta", "")
|
||||
if reasoning_text:
|
||||
agent._fire_reasoning_delta(reasoning_text)
|
||||
# Collect completed output items — some backends
|
||||
# (chatgpt.com/backend-api/codex) stream valid items
|
||||
# via response.output_item.done but the SDK's
|
||||
# get_final_response() returns an empty output list.
|
||||
elif event_type == "response.output_item.done":
|
||||
done_item = getattr(event, "item", None)
|
||||
if done_item is not None:
|
||||
collected_output_items.append(done_item)
|
||||
# Log non-completed terminal events for diagnostics
|
||||
elif event_type in {"response.incomplete", "response.failed"}:
|
||||
resp_obj = getattr(event, "response", None)
|
||||
status = getattr(resp_obj, "status", None) if resp_obj else None
|
||||
incomplete_details = getattr(resp_obj, "incomplete_details", None) if resp_obj else None
|
||||
logger.warning(
|
||||
"Codex Responses stream received terminal event %s "
|
||||
"(status=%s, incomplete_details=%s, streamed_chars=%d). %s",
|
||||
event_type, status, incomplete_details,
|
||||
sum(len(p) for p in agent._codex_streamed_text_parts),
|
||||
agent._client_log_context(),
|
||||
)
|
||||
final_response = stream.get_final_response()
|
||||
# PATCH: ChatGPT Codex backend streams valid output items
|
||||
# but get_final_response() can return an empty output list.
|
||||
# Backfill from collected items or synthesize from deltas.
|
||||
_out = getattr(final_response, "output", None)
|
||||
if isinstance(_out, list) and not _out:
|
||||
if collected_output_items:
|
||||
final_response.output = list(collected_output_items)
|
||||
logger.debug(
|
||||
"Codex stream: backfilled %d output items from stream events",
|
||||
len(collected_output_items),
|
||||
)
|
||||
elif agent._codex_streamed_text_parts and not has_tool_calls:
|
||||
assembled = "".join(agent._codex_streamed_text_parts)
|
||||
final_response.output = [SimpleNamespace(
|
||||
type="message",
|
||||
role="assistant",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text=assembled)],
|
||||
)]
|
||||
logger.debug(
|
||||
"Codex stream: synthesized output from %d text deltas (%d chars)",
|
||||
len(agent._codex_streamed_text_parts), len(assembled),
|
||||
)
|
||||
return final_response
|
||||
except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
|
||||
if attempt < max_stream_retries:
|
||||
logger.debug(
|
||||
"Codex Responses stream transport failed (attempt %s/%s); retrying. %s error=%s",
|
||||
attempt + 1,
|
||||
max_stream_retries + 1,
|
||||
agent._client_log_context(),
|
||||
exc,
|
||||
)
|
||||
continue
|
||||
logger.debug(
|
||||
"Codex Responses stream transport failed; falling back to create(stream=True). %s error=%s",
|
||||
agent._client_log_context(),
|
||||
exc,
|
||||
)
|
||||
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
|
||||
except RuntimeError as exc:
|
||||
err_text = str(exc)
|
||||
missing_completed = "response.completed" in err_text
|
||||
# The OpenAI SDK's Responses streaming state machine raises
|
||||
# ``RuntimeError("Expected to have received `response.created`
|
||||
# before `<event-type>`")`` when the first SSE event from the
|
||||
# server is anything other than ``response.created`` — and it
|
||||
# discards the event's payload before we can read it. Three
|
||||
# real-world backends emit a different first frame:
|
||||
#
|
||||
# * xAI on grok-4.x OAuth — sends ``error`` (issues
|
||||
# reported around the May 2026 SuperGrok rollout when
|
||||
# multi-turn conversations replay encrypted reasoning
|
||||
# content the OAuth tier rejects)
|
||||
# * codex-lb relays — send ``codex.rate_limits`` (#14634)
|
||||
# * custom Responses relays — send ``response.in_progress``
|
||||
# (#8133)
|
||||
#
|
||||
# In all three cases the underlying byte stream is still
|
||||
# readable: a non-stream ``responses.create(stream=True)``
|
||||
# fallback succeeds and surfaces the real provider error as
|
||||
# a normal exception with body+status_code attached, which
|
||||
# ``_summarize_api_error`` can then translate into a useful
|
||||
# user-facing line. Treat ``response.created`` prelude
|
||||
# errors the same way we already treat ``response.completed``
|
||||
# postlude errors.
|
||||
prelude_error = (
|
||||
"Expected to have received `response.created`" in err_text
|
||||
or "Expected to have received \"response.created\"" in err_text
|
||||
)
|
||||
if (missing_completed or prelude_error) and attempt < max_stream_retries:
|
||||
logger.debug(
|
||||
"Responses stream %s (attempt %s/%s); retrying. %s",
|
||||
"prelude rejected" if prelude_error else "closed before completion",
|
||||
attempt + 1,
|
||||
max_stream_retries + 1,
|
||||
agent._client_log_context(),
|
||||
)
|
||||
continue
|
||||
if missing_completed or prelude_error:
|
||||
logger.debug(
|
||||
"Responses stream %s; falling back to create(stream=True). %s err=%s",
|
||||
"rejected before response.created" if prelude_error else "did not emit response.completed",
|
||||
agent._client_log_context(),
|
||||
err_text,
|
||||
)
|
||||
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
|
||||
raise
|
||||
|
||||
|
||||
|
||||
def run_codex_create_stream_fallback(agent, api_kwargs: dict, client: Any = None):
|
||||
"""Fallback path for stream completion edge cases on Codex-style Responses backends."""
|
||||
active_client = client or agent._ensure_primary_openai_client(reason="codex_create_stream_fallback")
|
||||
fallback_kwargs = dict(api_kwargs)
|
||||
fallback_kwargs["stream"] = True
|
||||
fallback_kwargs = agent._get_transport().preflight_kwargs(fallback_kwargs, allow_stream=True)
|
||||
stream_or_response = active_client.responses.create(**fallback_kwargs)
|
||||
|
||||
# Compatibility shim for mocks or providers that still return a concrete response.
|
||||
if hasattr(stream_or_response, "output"):
|
||||
return stream_or_response
|
||||
if not hasattr(stream_or_response, "__iter__"):
|
||||
return stream_or_response
|
||||
|
||||
terminal_response = None
|
||||
collected_output_items: list = []
|
||||
collected_text_deltas: list = []
|
||||
try:
|
||||
for event in stream_or_response:
|
||||
agent._touch_activity("receiving stream response")
|
||||
event_type = getattr(event, "type", None)
|
||||
if not event_type and isinstance(event, dict):
|
||||
event_type = event.get("type")
|
||||
|
||||
# ``error`` SSE frames carry the provider's real failure
|
||||
# reason (subscription / quota / model-not-available /
|
||||
# rejected-reasoning-replay) but never appear in the
|
||||
# ``{completed, incomplete, failed}`` terminal set, so the
|
||||
# raw loop below would silently consume them and end with
|
||||
# "did not emit a terminal response". xAI in particular
|
||||
# emits ``type=error`` as the FIRST frame for OAuth
|
||||
# accounts whose Grok subscription is missing/exhausted —
|
||||
# the SDK's stream helper raises ``RuntimeError(Expected
|
||||
# to have received response.created before error)`` which
|
||||
# the caller catches and routes here, expecting this
|
||||
# fallback to surface the message. Synthesize an
|
||||
# APIError-shaped exception so ``_summarize_api_error``
|
||||
# and the credential-pool entitlement detector see the
|
||||
# real text instead of a generic RuntimeError.
|
||||
if event_type == "error":
|
||||
err_message = getattr(event, "message", None)
|
||||
if not err_message and isinstance(event, dict):
|
||||
err_message = event.get("message")
|
||||
err_code = getattr(event, "code", None)
|
||||
if not err_code and isinstance(event, dict):
|
||||
err_code = event.get("code")
|
||||
err_param = getattr(event, "param", None)
|
||||
if not err_param and isinstance(event, dict):
|
||||
err_param = event.get("param")
|
||||
err_message = (err_message or "stream emitted error event").strip()
|
||||
from run_agent import _StreamErrorEvent
|
||||
raise _StreamErrorEvent(err_message, code=err_code, param=err_param)
|
||||
|
||||
# Collect output items and text deltas for backfill
|
||||
if event_type == "response.output_item.done":
|
||||
done_item = getattr(event, "item", None)
|
||||
if done_item is None and isinstance(event, dict):
|
||||
done_item = event.get("item")
|
||||
if done_item is not None:
|
||||
collected_output_items.append(done_item)
|
||||
elif event_type in {"response.output_text.delta",}:
|
||||
delta = getattr(event, "delta", "")
|
||||
if not delta and isinstance(event, dict):
|
||||
delta = event.get("delta", "")
|
||||
if delta:
|
||||
collected_text_deltas.append(delta)
|
||||
|
||||
if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
|
||||
continue
|
||||
|
||||
terminal_response = getattr(event, "response", None)
|
||||
if terminal_response is None and isinstance(event, dict):
|
||||
terminal_response = event.get("response")
|
||||
if terminal_response is not None:
|
||||
# Backfill empty output from collected stream events
|
||||
_out = getattr(terminal_response, "output", None)
|
||||
if isinstance(_out, list) and not _out:
|
||||
if collected_output_items:
|
||||
terminal_response.output = list(collected_output_items)
|
||||
logger.debug(
|
||||
"Codex fallback stream: backfilled %d output items",
|
||||
len(collected_output_items),
|
||||
)
|
||||
elif collected_text_deltas:
|
||||
assembled = "".join(collected_text_deltas)
|
||||
terminal_response.output = [SimpleNamespace(
|
||||
type="message", role="assistant",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text=assembled)],
|
||||
)]
|
||||
logger.debug(
|
||||
"Codex fallback stream: synthesized from %d deltas (%d chars)",
|
||||
len(collected_text_deltas), len(assembled),
|
||||
)
|
||||
return terminal_response
|
||||
finally:
|
||||
close_fn = getattr(stream_or_response, "close", None)
|
||||
if callable(close_fn):
|
||||
try:
|
||||
close_fn()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if terminal_response is not None:
|
||||
return terminal_response
|
||||
raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")
|
||||
|
||||
|
||||
|
||||
__all__ = [
|
||||
"run_codex_app_server_turn",
|
||||
"run_codex_stream",
|
||||
"run_codex_create_stream_fallback",
|
||||
]
|
||||
556
agent/conversation_compression.py
Normal file
556
agent/conversation_compression.py
Normal file
|
|
@ -0,0 +1,556 @@
|
|||
"""Context compression — extract the AIAgent methods that drive summarisation.
|
||||
|
||||
Three concerns live here:
|
||||
|
||||
* :func:`check_compression_model_feasibility` — startup probe of the
|
||||
configured auxiliary compression model. Warns when the aux context
|
||||
window can't fit the main model's compression threshold; auto-lowers
|
||||
the session threshold when possible; hard-rejects auxes below
|
||||
``MINIMUM_CONTEXT_LENGTH``.
|
||||
|
||||
* :func:`replay_compression_warning` — re-emit a stored warning through
|
||||
the gateway ``status_callback`` once it's wired up (the callback is
|
||||
set after :class:`AIAgent` construction).
|
||||
|
||||
* :func:`compress_context` — the actual compression call. Runs the
|
||||
configured compressor, splits the SQLite session, rotates the
|
||||
session_id, notifies plugin context engines / memory providers, and
|
||||
returns the compressed message list and freshly-built system prompt.
|
||||
|
||||
* :func:`try_shrink_image_parts_in_messages` — image-too-large recovery
|
||||
helper that re-encodes ``data:image/...;base64,...`` parts at a smaller
|
||||
size so retries can fit under provider ceilings (Anthropic's 5 MB).
|
||||
|
||||
``run_agent`` keeps thin wrappers for each so existing call sites
|
||||
(``self._compress_context(...)``) keep working. Tests that exercise
|
||||
these paths see no behavioural change.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
from agent.model_metadata import estimate_request_tokens_rough
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def check_compression_model_feasibility(agent: Any) -> None:
|
||||
"""Warn at session start if the auxiliary compression model's context
|
||||
window is smaller than the main model's compression threshold.
|
||||
|
||||
When the auxiliary model cannot fit the content that needs summarising,
|
||||
compression will either fail outright (the LLM call errors) or produce
|
||||
a severely truncated summary.
|
||||
|
||||
Called during ``AIAgent.__init__`` so CLI users see the warning
|
||||
immediately (via ``_vprint``). The gateway sets ``status_callback``
|
||||
*after* construction, so :func:`replay_compression_warning` re-sends
|
||||
the stored warning through the callback on the first
|
||||
``run_conversation()`` call.
|
||||
"""
|
||||
if not agent.compression_enabled:
|
||||
return
|
||||
try:
|
||||
from agent.auxiliary_client import (
|
||||
_resolve_task_provider_model,
|
||||
get_text_auxiliary_client,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
get_model_context_length,
|
||||
)
|
||||
|
||||
client, aux_model = get_text_auxiliary_client(
|
||||
"compression",
|
||||
main_runtime=agent._current_main_runtime(),
|
||||
)
|
||||
# Best-effort aux provider label for the warning message. The
|
||||
# configured provider may be "auto", in which case we fall back
|
||||
# to the client's base_url hostname so the user can still tell
|
||||
# where the compression model is actually being called.
|
||||
try:
|
||||
_aux_cfg_provider, _, _, _, _ = _resolve_task_provider_model("compression")
|
||||
except Exception:
|
||||
_aux_cfg_provider = ""
|
||||
if client is None or not aux_model:
|
||||
if _aux_cfg_provider and _aux_cfg_provider != "auto":
|
||||
msg = (
|
||||
"⚠ Configured auxiliary compression provider "
|
||||
f"'{_aux_cfg_provider}' is unavailable — context "
|
||||
"compression will drop middle turns without a summary. "
|
||||
"Check auxiliary.compression in config.yaml and "
|
||||
"reauthenticate that provider."
|
||||
)
|
||||
else:
|
||||
msg = (
|
||||
"⚠ No auxiliary LLM provider configured — context "
|
||||
"compression will drop middle turns without a summary. "
|
||||
"Run `hermes setup` or set OPENROUTER_API_KEY."
|
||||
)
|
||||
agent._compression_warning = msg
|
||||
agent._emit_status(msg)
|
||||
logger.warning(
|
||||
"No auxiliary LLM provider for compression — "
|
||||
"summaries will be unavailable."
|
||||
)
|
||||
return
|
||||
|
||||
aux_base_url = str(getattr(client, "base_url", ""))
|
||||
aux_api_key = str(getattr(client, "api_key", ""))
|
||||
|
||||
aux_context = get_model_context_length(
|
||||
aux_model,
|
||||
base_url=aux_base_url,
|
||||
api_key=aux_api_key,
|
||||
config_context_length=getattr(agent, "_aux_compression_context_length_config", None),
|
||||
# Each model must be resolved with its own provider so that
|
||||
# provider-specific paths (e.g. Bedrock static table, OpenRouter API)
|
||||
# are invoked for the correct client, not inherited from the main model.
|
||||
provider=(_aux_cfg_provider if _aux_cfg_provider and _aux_cfg_provider != "auto" else getattr(agent, "provider", "")),
|
||||
custom_providers=agent._custom_providers,
|
||||
)
|
||||
|
||||
# Hard floor: the auxiliary compression model must have at least
|
||||
# MINIMUM_CONTEXT_LENGTH (64K) tokens of context. The main model
|
||||
# is already required to meet this floor (checked earlier in
|
||||
# __init__), so the compression model must too — otherwise it
|
||||
# cannot summarise a full threshold-sized window of main-model
|
||||
# content. Mirrors the main-model rejection pattern.
|
||||
if aux_context and aux_context < MINIMUM_CONTEXT_LENGTH:
|
||||
raise ValueError(
|
||||
f"Auxiliary compression model {aux_model} has a context "
|
||||
f"window of {aux_context:,} tokens, which is below the "
|
||||
f"minimum {MINIMUM_CONTEXT_LENGTH:,} required by Hermes "
|
||||
f"Agent. Choose a compression model with at least "
|
||||
f"{MINIMUM_CONTEXT_LENGTH // 1000}K context (set "
|
||||
f"auxiliary.compression.model in config.yaml), or set "
|
||||
f"auxiliary.compression.context_length to override the "
|
||||
f"detected value if it is wrong."
|
||||
)
|
||||
|
||||
threshold = agent.context_compressor.threshold_tokens
|
||||
if aux_context < threshold:
|
||||
# Auto-correct: lower the live session threshold so
|
||||
# compression actually works this session. The hard floor
|
||||
# above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH,
|
||||
# so the new threshold is always >= 64K.
|
||||
#
|
||||
# The compression summariser sends a single user-role
|
||||
# prompt (no system prompt, no tools) to the aux model, so
|
||||
# new_threshold == aux_context is safe: the request is
|
||||
# the raw messages plus a small summarisation instruction.
|
||||
old_threshold = threshold
|
||||
new_threshold = aux_context
|
||||
agent.context_compressor.threshold_tokens = new_threshold
|
||||
# Keep threshold_percent in sync so future main-model
|
||||
# context_length changes (update_model) re-derive from a
|
||||
# sensible number rather than the original too-high value.
|
||||
main_ctx = agent.context_compressor.context_length
|
||||
if main_ctx:
|
||||
agent.context_compressor.threshold_percent = (
|
||||
new_threshold / main_ctx
|
||||
)
|
||||
safe_pct = int((aux_context / main_ctx) * 100) if main_ctx else 50
|
||||
# Build human-readable "model (provider)" labels for both
|
||||
# the main model and the compression model so users can
|
||||
# tell at a glance which provider each side is actually
|
||||
# using. When the configured provider is empty or "auto",
|
||||
# fall back to the client's base_url hostname.
|
||||
_main_model = getattr(agent, "model", "") or "?"
|
||||
_main_provider = getattr(agent, "provider", "") or ""
|
||||
_aux_provider_label = (
|
||||
_aux_cfg_provider
|
||||
if _aux_cfg_provider and _aux_cfg_provider != "auto"
|
||||
else ""
|
||||
)
|
||||
if not _aux_provider_label:
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
_aux_provider_label = (
|
||||
urlparse(aux_base_url).hostname or aux_base_url
|
||||
)
|
||||
except Exception:
|
||||
_aux_provider_label = aux_base_url or "auto"
|
||||
_main_label = (
|
||||
f"{_main_model} ({_main_provider})"
|
||||
if _main_provider
|
||||
else _main_model
|
||||
)
|
||||
_aux_label = f"{aux_model} ({_aux_provider_label})"
|
||||
msg = (
|
||||
f"⚠ Compression model {_aux_label} context is "
|
||||
f"{aux_context:,} tokens, but the main model "
|
||||
f"{_main_label}'s compression threshold was "
|
||||
f"{old_threshold:,} tokens. "
|
||||
f"Auto-lowered this session's threshold to "
|
||||
f"{new_threshold:,} tokens so compression can run.\n"
|
||||
f" To make this permanent, edit config.yaml — either:\n"
|
||||
f" 1. Use a larger compression model:\n"
|
||||
f" auxiliary:\n"
|
||||
f" compression:\n"
|
||||
f" model: <model-with-{old_threshold:,}+-context>\n"
|
||||
f" 2. Lower the compression threshold:\n"
|
||||
f" compression:\n"
|
||||
f" threshold: 0.{safe_pct:02d}"
|
||||
)
|
||||
agent._compression_warning = msg
|
||||
agent._emit_status(msg)
|
||||
logger.warning(
|
||||
"Auxiliary compression model %s has %d token context, "
|
||||
"below the main model's compression threshold of %d "
|
||||
"tokens — auto-lowered session threshold to %d to "
|
||||
"keep compression working.",
|
||||
aux_model,
|
||||
aux_context,
|
||||
old_threshold,
|
||||
new_threshold,
|
||||
)
|
||||
except ValueError:
|
||||
# Hard rejections (aux below minimum context) must propagate
|
||||
# so the session refuses to start.
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"Compression feasibility check failed (non-fatal): %s", exc
|
||||
)
|
||||
|
||||
|
||||
def replay_compression_warning(agent: Any) -> None:
|
||||
"""Re-send the compression warning through ``status_callback``.
|
||||
|
||||
During ``__init__`` the gateway's ``status_callback`` is not yet
|
||||
wired, so ``_emit_status`` only reaches ``_vprint`` (CLI). This
|
||||
method is called once at the start of the first
|
||||
``run_conversation()`` — by then the gateway has set the callback,
|
||||
so every platform (Telegram, Discord, Slack, etc.) receives the
|
||||
warning.
|
||||
"""
|
||||
msg = getattr(agent, "_compression_warning", None)
|
||||
if msg and agent.status_callback:
|
||||
try:
|
||||
agent.status_callback("lifecycle", msg)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def compress_context(
|
||||
agent: Any,
|
||||
messages: list,
|
||||
system_message: str,
|
||||
*,
|
||||
approx_tokens: Optional[int] = None,
|
||||
task_id: str = "default",
|
||||
focus_topic: Optional[str] = None,
|
||||
) -> Tuple[list, str]:
|
||||
"""Compress conversation context and split the session in SQLite.
|
||||
|
||||
Args:
|
||||
agent: The owning :class:`AIAgent`.
|
||||
messages: Current message history (will be summarised).
|
||||
system_message: Current system prompt; rebuilt after compression.
|
||||
approx_tokens: Pre-compression token estimate, logged for ops.
|
||||
task_id: Tool task scope (used for clearing file-read dedup state).
|
||||
focus_topic: Optional focus string for guided compression — the
|
||||
summariser will prioritise preserving information related to
|
||||
this topic. Inspired by Claude Code's ``/compact <focus>``.
|
||||
|
||||
Returns:
|
||||
``(compressed_messages, new_system_prompt)`` tuple.
|
||||
"""
|
||||
_pre_msg_count = len(messages)
|
||||
logger.info(
|
||||
"context compression started: session=%s messages=%d tokens=~%s model=%s focus=%r",
|
||||
agent.session_id or "none", _pre_msg_count,
|
||||
f"{approx_tokens:,}" if approx_tokens else "unknown", agent.model,
|
||||
focus_topic,
|
||||
)
|
||||
agent._emit_status(
|
||||
"🗜️ Compacting context — summarizing earlier conversation so I can continue..."
|
||||
)
|
||||
|
||||
# Notify external memory provider before compression discards context
|
||||
if agent._memory_manager:
|
||||
try:
|
||||
agent._memory_manager.on_pre_compress(messages)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic)
|
||||
except TypeError:
|
||||
# Plugin context engine with strict signature that doesn't accept
|
||||
# focus_topic — fall back to calling without it.
|
||||
compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens)
|
||||
|
||||
summary_error = getattr(agent.context_compressor, "_last_summary_error", None)
|
||||
if summary_error:
|
||||
if getattr(agent, "_last_compression_summary_warning", None) != summary_error:
|
||||
agent._last_compression_summary_warning = summary_error
|
||||
agent._emit_warning(
|
||||
f"⚠ Compression summary failed: {summary_error}. "
|
||||
"Inserted a fallback context marker."
|
||||
)
|
||||
else:
|
||||
# No hard failure — but did the configured aux model error out
|
||||
# and get recovered by retrying on main? Surface that so users
|
||||
# know their auxiliary.compression.model setting is broken even
|
||||
# though compression succeeded.
|
||||
_aux_fail_model = getattr(agent.context_compressor, "_last_aux_model_failure_model", None)
|
||||
_aux_fail_err = getattr(agent.context_compressor, "_last_aux_model_failure_error", None)
|
||||
if _aux_fail_model:
|
||||
# Dedup on (model, error) so we don't spam on every compaction
|
||||
_aux_key = (_aux_fail_model, _aux_fail_err)
|
||||
if getattr(agent, "_last_aux_fallback_warning_key", None) != _aux_key:
|
||||
agent._last_aux_fallback_warning_key = _aux_key
|
||||
agent._emit_warning(
|
||||
f"ℹ Configured compression model '{_aux_fail_model}' failed "
|
||||
f"({_aux_fail_err or 'unknown error'}). Recovered using main model — "
|
||||
"check auxiliary.compression.model in config.yaml."
|
||||
)
|
||||
|
||||
todo_snapshot = agent._todo_store.format_for_injection()
|
||||
if todo_snapshot:
|
||||
compressed.append({"role": "user", "content": todo_snapshot})
|
||||
|
||||
agent._invalidate_system_prompt()
|
||||
new_system_prompt = agent._build_system_prompt(system_message)
|
||||
agent._cached_system_prompt = new_system_prompt
|
||||
|
||||
if agent._session_db:
|
||||
try:
|
||||
# Propagate title to the new session with auto-numbering
|
||||
old_title = agent._session_db.get_session_title(agent.session_id)
|
||||
# Trigger memory extraction on the old session before it rotates.
|
||||
agent.commit_memory_session(messages)
|
||||
agent._session_db.end_session(agent.session_id, "compression")
|
||||
old_session_id = agent.session_id
|
||||
agent.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
||||
os.environ["HERMES_SESSION_ID"] = agent.session_id
|
||||
try:
|
||||
from gateway.session_context import _SESSION_ID
|
||||
_SESSION_ID.set(agent.session_id)
|
||||
except Exception:
|
||||
pass
|
||||
# Update session_log_file to point to the new session's JSON file
|
||||
agent.session_log_file = agent.logs_dir / f"session_{agent.session_id}.json"
|
||||
agent._session_db_created = False
|
||||
agent._session_db.create_session(
|
||||
session_id=agent.session_id,
|
||||
source=agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
|
||||
model=agent.model,
|
||||
model_config=agent._session_init_model_config,
|
||||
parent_session_id=old_session_id,
|
||||
)
|
||||
agent._session_db_created = True
|
||||
# Auto-number the title for the continuation session
|
||||
if old_title:
|
||||
try:
|
||||
new_title = agent._session_db.get_next_title_in_lineage(old_title)
|
||||
agent._session_db.set_session_title(agent.session_id, new_title)
|
||||
except (ValueError, Exception) as e:
|
||||
logger.debug("Could not propagate title on compression: %s", e)
|
||||
agent._session_db.update_system_prompt(agent.session_id, new_system_prompt)
|
||||
# Reset flush cursor — new session starts with no messages written
|
||||
agent._last_flushed_db_idx = 0
|
||||
except Exception as e:
|
||||
logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
|
||||
|
||||
# Notify the context engine that the session_id rotated because of
|
||||
# compression (not a fresh /new). Plugin engines (e.g. hermes-lcm) use
|
||||
# boundary_reason="compression" to preserve DAG lineage across the
|
||||
# rollover instead of re-initializing fresh per-session state.
|
||||
# See hermes-lcm#68. Built-in ContextCompressor ignores kwargs.
|
||||
try:
|
||||
_old_sid = locals().get("old_session_id")
|
||||
if _old_sid and hasattr(agent.context_compressor, "on_session_start"):
|
||||
agent.context_compressor.on_session_start(
|
||||
agent.session_id or "",
|
||||
boundary_reason="compression",
|
||||
old_session_id=_old_sid,
|
||||
)
|
||||
except Exception as _ce_err:
|
||||
logger.debug("context engine on_session_start (compression): %s", _ce_err)
|
||||
|
||||
# Notify memory providers of the compression-driven session_id rotation
|
||||
# so provider-cached per-session state (Hindsight's _document_id,
|
||||
# accumulated turn buffers, counters) refreshes. reset=False because
|
||||
# the logical conversation continues; only the id and DB row rolled
|
||||
# over. See #6672.
|
||||
try:
|
||||
_old_sid = locals().get("old_session_id")
|
||||
if _old_sid and agent._memory_manager:
|
||||
agent._memory_manager.on_session_switch(
|
||||
agent.session_id or "",
|
||||
parent_session_id=_old_sid,
|
||||
reset=False,
|
||||
reason="compression",
|
||||
)
|
||||
except Exception as _me_err:
|
||||
logger.debug("memory manager on_session_switch (compression): %s", _me_err)
|
||||
|
||||
# Warn on repeated compressions (quality degrades with each pass)
|
||||
_cc = agent.context_compressor.compression_count
|
||||
if _cc >= 2:
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}⚠️ Session compressed {_cc} times — "
|
||||
f"accuracy may degrade. Consider /new to start fresh.",
|
||||
force=True,
|
||||
)
|
||||
|
||||
# Update token estimate after compaction so pressure calculations
|
||||
# use the post-compression count, not the stale pre-compression one.
|
||||
# Use estimate_request_tokens_rough() so tool schemas are included —
|
||||
# with 50+ tools enabled, schemas alone can add 20-30K tokens, and
|
||||
# omitting them delays the next compression cycle far past the
|
||||
# configured threshold (issue #14695).
|
||||
_compressed_est = estimate_request_tokens_rough(
|
||||
compressed,
|
||||
system_prompt=new_system_prompt or "",
|
||||
tools=agent.tools or None,
|
||||
)
|
||||
agent.context_compressor.last_prompt_tokens = _compressed_est
|
||||
agent.context_compressor.last_completion_tokens = 0
|
||||
|
||||
# Clear the file-read dedup cache. After compression the original
|
||||
# read content is summarised away — if the model re-reads the same
|
||||
# file it needs the full content, not a "file unchanged" stub.
|
||||
try:
|
||||
from tools.file_tools import reset_file_dedup
|
||||
reset_file_dedup(task_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.info(
|
||||
"context compression done: session=%s messages=%d->%d tokens=~%s",
|
||||
agent.session_id or "none", _pre_msg_count, len(compressed),
|
||||
f"{_compressed_est:,}",
|
||||
)
|
||||
return compressed, new_system_prompt
|
||||
|
||||
|
||||
def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
|
||||
"""Re-encode all native image parts at a smaller size to recover from
|
||||
image-too-large errors (Anthropic 5 MB, unknown other providers).
|
||||
|
||||
Mutates ``api_messages`` in place. Returns True if any image part was
|
||||
actually replaced, False if there were no image parts to shrink or
|
||||
Pillow couldn't help (caller should surface the original error).
|
||||
|
||||
Strategy: look for ``image_url`` / ``input_image`` parts carrying a
|
||||
``data:image/...;base64,...`` payload. For each one whose encoded
|
||||
size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
|
||||
ceiling with header overhead), write the base64 to a tempfile, call
|
||||
``vision_tools._resize_image_for_vision`` to produce a smaller data
|
||||
URL, and substitute it in place.
|
||||
|
||||
Non-data-URL images (http/https URLs) are not touched — the provider
|
||||
fetches those itself and the size limit is different.
|
||||
"""
|
||||
if not api_messages:
|
||||
return False
|
||||
|
||||
try:
|
||||
from tools.vision_tools import _resize_image_for_vision
|
||||
except Exception as exc:
|
||||
logger.warning("image-shrink recovery: vision_tools unavailable — %s", exc)
|
||||
return False
|
||||
|
||||
# 4 MB target leaves comfortable headroom under Anthropic's 5 MB.
|
||||
# Non-Anthropic providers we haven't observed rejecting are fine with
|
||||
# much larger; shrinking to 4 MB here loses quality but only fires
|
||||
# after a confirmed provider rejection, so the alternative is failure.
|
||||
target_bytes = 4 * 1024 * 1024
|
||||
changed_count = 0
|
||||
|
||||
def _shrink_data_url(url: str) -> Optional[str]:
|
||||
"""Return a smaller data URL, or None if shrink can't help."""
|
||||
if not isinstance(url, str) or not url.startswith("data:"):
|
||||
return None
|
||||
if len(url) <= target_bytes:
|
||||
# This specific image wasn't the oversized one.
|
||||
return None
|
||||
try:
|
||||
header, _, data = url.partition(",")
|
||||
mime = "image/jpeg"
|
||||
if header.startswith("data:"):
|
||||
mime_part = header[len("data:"):].split(";", 1)[0].strip()
|
||||
if mime_part.startswith("image/"):
|
||||
mime = mime_part
|
||||
import base64 as _b64
|
||||
raw = _b64.b64decode(data)
|
||||
suffix = {
|
||||
"image/png": ".png", "image/gif": ".gif", "image/webp": ".webp",
|
||||
"image/jpeg": ".jpg", "image/jpg": ".jpg", "image/bmp": ".bmp",
|
||||
}.get(mime, ".jpg")
|
||||
tmp = tempfile.NamedTemporaryFile(
|
||||
prefix="hermes_shrink_", suffix=suffix, delete=False,
|
||||
)
|
||||
try:
|
||||
tmp.write(raw)
|
||||
tmp.close()
|
||||
resized = _resize_image_for_vision(
|
||||
Path(tmp.name),
|
||||
mime_type=mime,
|
||||
max_base64_bytes=target_bytes,
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
Path(tmp.name).unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
if not resized or len(resized) >= len(url):
|
||||
# Shrink didn't help (or made it bigger — corrupt input?).
|
||||
return None
|
||||
return resized
|
||||
except Exception as exc:
|
||||
logger.warning("image-shrink recovery: re-encode failed — %s", exc)
|
||||
return None
|
||||
|
||||
for msg in api_messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, list):
|
||||
continue
|
||||
for part in content:
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
ptype = part.get("type")
|
||||
if ptype not in {"image_url", "input_image"}:
|
||||
continue
|
||||
image_value = part.get("image_url")
|
||||
# OpenAI chat.completions: {"image_url": {"url": "data:..."}}
|
||||
# OpenAI Responses: {"image_url": "data:..."}
|
||||
if isinstance(image_value, dict):
|
||||
url = image_value.get("url", "")
|
||||
resized = _shrink_data_url(url)
|
||||
if resized:
|
||||
image_value["url"] = resized
|
||||
changed_count += 1
|
||||
elif isinstance(image_value, str):
|
||||
resized = _shrink_data_url(image_value)
|
||||
if resized:
|
||||
part["image_url"] = resized
|
||||
changed_count += 1
|
||||
|
||||
if changed_count:
|
||||
logger.info(
|
||||
"image-shrink recovery: re-encoded %d image part(s) to fit under %.0f MB",
|
||||
changed_count, target_bytes / (1024 * 1024),
|
||||
)
|
||||
return changed_count > 0
|
||||
|
||||
|
||||
__all__ = [
|
||||
"check_compression_model_feasibility",
|
||||
"replay_compression_warning",
|
||||
"compress_context",
|
||||
"try_shrink_image_parts_in_messages",
|
||||
]
|
||||
4018
agent/conversation_loop.py
Normal file
4018
agent/conversation_loop.py
Normal file
File diff suppressed because it is too large
Load diff
62
agent/iteration_budget.py
Normal file
62
agent/iteration_budget.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
"""Per-agent iteration budget — thread-safe consume/refund counter.
|
||||
|
||||
Extracted from ``run_agent.py``. Each ``AIAgent`` instance (parent or
|
||||
subagent) holds an :class:`IterationBudget`; the parent's cap comes from
|
||||
``max_iterations`` (default 90), each subagent's cap comes from
|
||||
``delegation.max_iterations`` (default 50).
|
||||
|
||||
``run_agent`` re-exports ``IterationBudget`` so existing
|
||||
``from run_agent import IterationBudget`` imports keep working unchanged.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
|
||||
|
||||
class IterationBudget:
|
||||
"""Thread-safe iteration counter for an agent.
|
||||
|
||||
Each agent (parent or subagent) gets its own ``IterationBudget``.
|
||||
The parent's budget is capped at ``max_iterations`` (default 90).
|
||||
Each subagent gets an independent budget capped at
|
||||
``delegation.max_iterations`` (default 50) — this means total
|
||||
iterations across parent + subagents can exceed the parent's cap.
|
||||
Users control the per-subagent limit via ``delegation.max_iterations``
|
||||
in config.yaml.
|
||||
|
||||
``execute_code`` (programmatic tool calling) iterations are refunded via
|
||||
:meth:`refund` so they don't eat into the budget.
|
||||
"""
|
||||
|
||||
def __init__(self, max_total: int):
|
||||
self.max_total = max_total
|
||||
self._used = 0
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def consume(self) -> bool:
|
||||
"""Try to consume one iteration. Returns True if allowed."""
|
||||
with self._lock:
|
||||
if self._used >= self.max_total:
|
||||
return False
|
||||
self._used += 1
|
||||
return True
|
||||
|
||||
def refund(self) -> None:
|
||||
"""Give back one iteration (e.g. for execute_code turns)."""
|
||||
with self._lock:
|
||||
if self._used > 0:
|
||||
self._used -= 1
|
||||
|
||||
@property
|
||||
def used(self) -> int:
|
||||
with self._lock:
|
||||
return self._used
|
||||
|
||||
@property
|
||||
def remaining(self) -> int:
|
||||
with self._lock:
|
||||
return max(0, self.max_total - self._used)
|
||||
|
||||
|
||||
__all__ = ["IterationBudget"]
|
||||
444
agent/message_sanitization.py
Normal file
444
agent/message_sanitization.py
Normal file
|
|
@ -0,0 +1,444 @@
|
|||
"""Message and tool-payload sanitization helpers.
|
||||
|
||||
Pure functions extracted from ``run_agent.py`` so the AIAgent module can
|
||||
stay focused on the conversation loop. These walk OpenAI-format message
|
||||
lists and structured payloads, repairing or stripping problematic
|
||||
characters that would otherwise crash ``json.dumps`` inside the OpenAI
|
||||
SDK or be rejected by upstream APIs.
|
||||
|
||||
All helpers are stateless and side-effect-free except for in-place
|
||||
mutation of their input (where documented). Backward-compatible
|
||||
re-exports from ``run_agent`` remain in place so existing imports
|
||||
``from run_agent import _sanitize_surrogates`` keep working.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Lone surrogate code points are invalid in UTF-8 and crash json.dumps
|
||||
# inside the OpenAI SDK. Used by every surrogate-sanitization helper
|
||||
# below as well as by run_agent and the CLI for paste-from-clipboard
|
||||
# scrubbing.
|
||||
_SURROGATE_RE = re.compile(r'[\ud800-\udfff]')
|
||||
|
||||
|
||||
def _sanitize_surrogates(text: str) -> str:
|
||||
"""Replace lone surrogate code points with U+FFFD (replacement character).
|
||||
|
||||
Surrogates are invalid in UTF-8 and will crash ``json.dumps()`` inside the
|
||||
OpenAI SDK. This is a fast no-op when the text contains no surrogates.
|
||||
"""
|
||||
if _SURROGATE_RE.search(text):
|
||||
return _SURROGATE_RE.sub('\ufffd', text)
|
||||
return text
|
||||
|
||||
|
||||
def _sanitize_structure_surrogates(payload: Any) -> bool:
|
||||
"""Replace surrogate code points in nested dict/list payloads in-place.
|
||||
|
||||
Mirror of ``_sanitize_structure_non_ascii`` but for surrogate recovery.
|
||||
Used to scrub nested structured fields (e.g. ``reasoning_details`` — an
|
||||
array of dicts with ``summary``/``text`` strings) that flat per-field
|
||||
checks don't reach. Returns True if any surrogates were replaced.
|
||||
"""
|
||||
found = False
|
||||
|
||||
def _walk(node):
|
||||
nonlocal found
|
||||
if isinstance(node, dict):
|
||||
for key, value in node.items():
|
||||
if isinstance(value, str):
|
||||
if _SURROGATE_RE.search(value):
|
||||
node[key] = _SURROGATE_RE.sub('\ufffd', value)
|
||||
found = True
|
||||
elif isinstance(value, (dict, list)):
|
||||
_walk(value)
|
||||
elif isinstance(node, list):
|
||||
for idx, value in enumerate(node):
|
||||
if isinstance(value, str):
|
||||
if _SURROGATE_RE.search(value):
|
||||
node[idx] = _SURROGATE_RE.sub('\ufffd', value)
|
||||
found = True
|
||||
elif isinstance(value, (dict, list)):
|
||||
_walk(value)
|
||||
|
||||
_walk(payload)
|
||||
return found
|
||||
|
||||
|
||||
def _sanitize_messages_surrogates(messages: list) -> bool:
|
||||
"""Sanitize surrogate characters from all string content in a messages list.
|
||||
|
||||
Walks message dicts in-place. Returns True if any surrogates were found
|
||||
and replaced, False otherwise. Covers content/text, name, tool call
|
||||
metadata/arguments, AND any additional string or nested structured fields
|
||||
(``reasoning``, ``reasoning_content``, ``reasoning_details``, etc.) so
|
||||
retries don't fail on a non-content field. Byte-level reasoning models
|
||||
(xiaomi/mimo, kimi, glm) can emit lone surrogates in reasoning output
|
||||
that flow through to ``api_messages["reasoning_content"]`` on the next
|
||||
turn and crash json.dumps inside the OpenAI SDK.
|
||||
"""
|
||||
found = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if isinstance(content, str) and _SURROGATE_RE.search(content):
|
||||
msg["content"] = _SURROGATE_RE.sub('\ufffd', content)
|
||||
found = True
|
||||
elif isinstance(content, list):
|
||||
for part in content:
|
||||
if isinstance(part, dict):
|
||||
text = part.get("text")
|
||||
if isinstance(text, str) and _SURROGATE_RE.search(text):
|
||||
part["text"] = _SURROGATE_RE.sub('\ufffd', text)
|
||||
found = True
|
||||
name = msg.get("name")
|
||||
if isinstance(name, str) and _SURROGATE_RE.search(name):
|
||||
msg["name"] = _SURROGATE_RE.sub('\ufffd', name)
|
||||
found = True
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if not isinstance(tc, dict):
|
||||
continue
|
||||
tc_id = tc.get("id")
|
||||
if isinstance(tc_id, str) and _SURROGATE_RE.search(tc_id):
|
||||
tc["id"] = _SURROGATE_RE.sub('\ufffd', tc_id)
|
||||
found = True
|
||||
fn = tc.get("function")
|
||||
if isinstance(fn, dict):
|
||||
fn_name = fn.get("name")
|
||||
if isinstance(fn_name, str) and _SURROGATE_RE.search(fn_name):
|
||||
fn["name"] = _SURROGATE_RE.sub('\ufffd', fn_name)
|
||||
found = True
|
||||
fn_args = fn.get("arguments")
|
||||
if isinstance(fn_args, str) and _SURROGATE_RE.search(fn_args):
|
||||
fn["arguments"] = _SURROGATE_RE.sub('\ufffd', fn_args)
|
||||
found = True
|
||||
# Walk any additional string / nested fields (reasoning,
|
||||
# reasoning_content, reasoning_details, etc.) — surrogates from
|
||||
# byte-level reasoning models (xiaomi/mimo, kimi, glm) can lurk
|
||||
# in these fields and aren't covered by the per-field checks above.
|
||||
# Matches _sanitize_messages_non_ascii's coverage (PR #10537).
|
||||
for key, value in msg.items():
|
||||
if key in {"content", "name", "tool_calls", "role"}:
|
||||
continue
|
||||
if isinstance(value, str):
|
||||
if _SURROGATE_RE.search(value):
|
||||
msg[key] = _SURROGATE_RE.sub('\ufffd', value)
|
||||
found = True
|
||||
elif isinstance(value, (dict, list)):
|
||||
if _sanitize_structure_surrogates(value):
|
||||
found = True
|
||||
return found
|
||||
|
||||
|
||||
def _escape_invalid_chars_in_json_strings(raw: str) -> str:
|
||||
"""Escape unescaped control chars inside JSON string values.
|
||||
|
||||
Walks the raw JSON character-by-character, tracking whether we are
|
||||
inside a double-quoted string. Inside strings, replaces literal
|
||||
control characters (0x00-0x1F) that aren't already part of an escape
|
||||
sequence with their ``\\uXXXX`` equivalents. Pass-through for everything
|
||||
else.
|
||||
|
||||
Ported from #12093 — complements the other repair passes in
|
||||
``_repair_tool_call_arguments`` when ``json.loads(strict=False)`` is
|
||||
not enough (e.g. llama.cpp backends that emit literal apostrophes or
|
||||
tabs alongside other malformations).
|
||||
"""
|
||||
out: list[str] = []
|
||||
in_string = False
|
||||
i = 0
|
||||
n = len(raw)
|
||||
while i < n:
|
||||
ch = raw[i]
|
||||
if in_string:
|
||||
if ch == "\\" and i + 1 < n:
|
||||
# Already-escaped char — pass through as-is
|
||||
out.append(ch)
|
||||
out.append(raw[i + 1])
|
||||
i += 2
|
||||
continue
|
||||
if ch == '"':
|
||||
in_string = False
|
||||
out.append(ch)
|
||||
elif ord(ch) < 0x20:
|
||||
out.append(f"\\u{ord(ch):04x}")
|
||||
else:
|
||||
out.append(ch)
|
||||
else:
|
||||
if ch == '"':
|
||||
in_string = True
|
||||
out.append(ch)
|
||||
i += 1
|
||||
return "".join(out)
|
||||
|
||||
|
||||
def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
|
||||
"""Attempt to repair malformed tool_call argument JSON.
|
||||
|
||||
Models like GLM-5.1 via Ollama can produce truncated JSON, trailing
|
||||
commas, Python ``None``, etc. The API proxy rejects these with HTTP 400
|
||||
"invalid tool call arguments". This function applies common repairs;
|
||||
if all fail it returns ``"{}"`` so the request succeeds (better than
|
||||
crashing the session). All repairs are logged at WARNING level.
|
||||
"""
|
||||
raw_stripped = raw_args.strip() if isinstance(raw_args, str) else ""
|
||||
|
||||
# Fast-path: empty / whitespace-only -> empty object
|
||||
if not raw_stripped:
|
||||
logger.warning("Sanitized empty tool_call arguments for %s", tool_name)
|
||||
return "{}"
|
||||
|
||||
# Python-literal None -> normalise to {}
|
||||
if raw_stripped == "None":
|
||||
logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name)
|
||||
return "{}"
|
||||
|
||||
# Repair pass 0: llama.cpp backends sometimes emit literal control
|
||||
# characters (tabs, newlines) inside JSON string values. json.loads
|
||||
# with strict=False accepts these and lets us re-serialise the
|
||||
# result into wire-valid JSON without any string surgery. This is
|
||||
# the most common local-model repair case (#12068).
|
||||
try:
|
||||
parsed = json.loads(raw_stripped, strict=False)
|
||||
reserialised = json.dumps(parsed, separators=(",", ":"))
|
||||
if reserialised != raw_stripped:
|
||||
logger.warning(
|
||||
"Repaired unescaped control chars in tool_call arguments for %s",
|
||||
tool_name,
|
||||
)
|
||||
return reserialised
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Attempt common JSON repairs
|
||||
fixed = raw_stripped
|
||||
# 1. Strip trailing commas before } or ]
|
||||
fixed = re.sub(r',\s*([}\]])', r'\1', fixed)
|
||||
# 2. Close unclosed structures
|
||||
open_curly = fixed.count('{') - fixed.count('}')
|
||||
open_bracket = fixed.count('[') - fixed.count(']')
|
||||
if open_curly > 0:
|
||||
fixed += '}' * open_curly
|
||||
if open_bracket > 0:
|
||||
fixed += ']' * open_bracket
|
||||
# 3. Remove excess closing braces/brackets (bounded to 50 iterations)
|
||||
for _ in range(50):
|
||||
try:
|
||||
json.loads(fixed)
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
if fixed.endswith('}') and fixed.count('}') > fixed.count('{'):
|
||||
fixed = fixed[:-1]
|
||||
elif fixed.endswith(']') and fixed.count(']') > fixed.count('['):
|
||||
fixed = fixed[:-1]
|
||||
else:
|
||||
break
|
||||
|
||||
try:
|
||||
json.loads(fixed)
|
||||
logger.warning(
|
||||
"Repaired malformed tool_call arguments for %s: %s → %s",
|
||||
tool_name, raw_stripped[:80], fixed[:80],
|
||||
)
|
||||
return fixed
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Repair pass 4: escape unescaped control chars inside JSON strings,
|
||||
# then retry. Catches cases where strict=False alone fails because
|
||||
# other malformations are present too.
|
||||
try:
|
||||
escaped = _escape_invalid_chars_in_json_strings(fixed)
|
||||
if escaped != fixed:
|
||||
json.loads(escaped)
|
||||
logger.warning(
|
||||
"Repaired control-char-laced tool_call arguments for %s: %s → %s",
|
||||
tool_name, raw_stripped[:80], escaped[:80],
|
||||
)
|
||||
return escaped
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Last resort: replace with empty object so the API request doesn't
|
||||
# crash the entire session.
|
||||
logger.warning(
|
||||
"Unrepairable tool_call arguments for %s — "
|
||||
"replaced with empty object (was: %s)",
|
||||
tool_name, raw_stripped[:80],
|
||||
)
|
||||
return "{}"
|
||||
|
||||
|
||||
def _strip_non_ascii(text: str) -> str:
|
||||
"""Remove non-ASCII characters, replacing with closest ASCII equivalent or removing.
|
||||
|
||||
Used as a last resort when the system encoding is ASCII and can't handle
|
||||
any non-ASCII characters (e.g. LANG=C on Chromebooks).
|
||||
"""
|
||||
return text.encode('ascii', errors='ignore').decode('ascii')
|
||||
|
||||
|
||||
def _sanitize_messages_non_ascii(messages: list) -> bool:
|
||||
"""Strip non-ASCII characters from all string content in a messages list.
|
||||
|
||||
This is a last-resort recovery for systems with ASCII-only encoding
|
||||
(LANG=C, Chromebooks, minimal containers). Returns True if any
|
||||
non-ASCII content was found and sanitized.
|
||||
"""
|
||||
found = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
# Sanitize content (string)
|
||||
content = msg.get("content")
|
||||
if isinstance(content, str):
|
||||
sanitized = _strip_non_ascii(content)
|
||||
if sanitized != content:
|
||||
msg["content"] = sanitized
|
||||
found = True
|
||||
elif isinstance(content, list):
|
||||
for part in content:
|
||||
if isinstance(part, dict):
|
||||
text = part.get("text")
|
||||
if isinstance(text, str):
|
||||
sanitized = _strip_non_ascii(text)
|
||||
if sanitized != text:
|
||||
part["text"] = sanitized
|
||||
found = True
|
||||
# Sanitize name field (can contain non-ASCII in tool results)
|
||||
name = msg.get("name")
|
||||
if isinstance(name, str):
|
||||
sanitized = _strip_non_ascii(name)
|
||||
if sanitized != name:
|
||||
msg["name"] = sanitized
|
||||
found = True
|
||||
# Sanitize tool_calls
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if isinstance(tc, dict):
|
||||
fn = tc.get("function", {})
|
||||
if isinstance(fn, dict):
|
||||
fn_args = fn.get("arguments")
|
||||
if isinstance(fn_args, str):
|
||||
sanitized = _strip_non_ascii(fn_args)
|
||||
if sanitized != fn_args:
|
||||
fn["arguments"] = sanitized
|
||||
found = True
|
||||
# Sanitize any additional top-level string fields (e.g. reasoning_content)
|
||||
for key, value in msg.items():
|
||||
if key in {"content", "name", "tool_calls", "role"}:
|
||||
continue
|
||||
if isinstance(value, str):
|
||||
sanitized = _strip_non_ascii(value)
|
||||
if sanitized != value:
|
||||
msg[key] = sanitized
|
||||
found = True
|
||||
return found
|
||||
|
||||
|
||||
def _sanitize_tools_non_ascii(tools: list) -> bool:
|
||||
"""Strip non-ASCII characters from tool payloads in-place."""
|
||||
return _sanitize_structure_non_ascii(tools)
|
||||
|
||||
|
||||
def _strip_images_from_messages(messages: list) -> bool:
|
||||
"""Remove image_url content parts from all messages in-place.
|
||||
|
||||
Called when a server signals it does not support images (e.g.
|
||||
"Only 'text' content type is supported."). Mutates messages so the
|
||||
next API call sends text only.
|
||||
|
||||
Preserves message alternation invariants:
|
||||
* ``tool``-role messages whose content was entirely images are replaced
|
||||
with a plaintext placeholder, NOT deleted — deleting them would leave
|
||||
the paired ``tool_call_id`` on the prior assistant message unmatched,
|
||||
which providers reject with HTTP 400.
|
||||
* Non-tool messages whose content becomes empty are dropped. In
|
||||
practice this only hits synthetic image-only user messages appended
|
||||
for attachment delivery; real user turns always include text.
|
||||
|
||||
Returns True if any image parts were removed.
|
||||
"""
|
||||
found = False
|
||||
to_delete = []
|
||||
for i, msg in enumerate(messages):
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, list):
|
||||
continue
|
||||
new_parts = []
|
||||
for part in content:
|
||||
if isinstance(part, dict) and part.get("type") in {"image_url", "image", "input_image"}:
|
||||
found = True
|
||||
else:
|
||||
new_parts.append(part)
|
||||
if len(new_parts) < len(content):
|
||||
if new_parts:
|
||||
msg["content"] = new_parts
|
||||
elif msg.get("role") == "tool":
|
||||
# Preserve tool_call_id linkage — providers require every
|
||||
# assistant tool_call to have a matching tool response.
|
||||
msg["content"] = "[image content removed — server does not support images]"
|
||||
else:
|
||||
# Synthetic image-only user/assistant message with no text;
|
||||
# safe to drop.
|
||||
to_delete.append(i)
|
||||
for i in reversed(to_delete):
|
||||
del messages[i]
|
||||
return found
|
||||
|
||||
|
||||
def _sanitize_structure_non_ascii(payload: Any) -> bool:
|
||||
"""Strip non-ASCII characters from nested dict/list payloads in-place."""
|
||||
found = False
|
||||
|
||||
def _walk(node):
|
||||
nonlocal found
|
||||
if isinstance(node, dict):
|
||||
for key, value in node.items():
|
||||
if isinstance(value, str):
|
||||
sanitized = _strip_non_ascii(value)
|
||||
if sanitized != value:
|
||||
node[key] = sanitized
|
||||
found = True
|
||||
elif isinstance(value, (dict, list)):
|
||||
_walk(value)
|
||||
elif isinstance(node, list):
|
||||
for idx, value in enumerate(node):
|
||||
if isinstance(value, str):
|
||||
sanitized = _strip_non_ascii(value)
|
||||
if sanitized != value:
|
||||
node[idx] = sanitized
|
||||
found = True
|
||||
elif isinstance(value, (dict, list)):
|
||||
_walk(value)
|
||||
|
||||
_walk(payload)
|
||||
return found
|
||||
|
||||
|
||||
__all__ = [
|
||||
"_SURROGATE_RE",
|
||||
"_sanitize_surrogates",
|
||||
"_sanitize_structure_surrogates",
|
||||
"_sanitize_messages_surrogates",
|
||||
"_escape_invalid_chars_in_json_strings",
|
||||
"_repair_tool_call_arguments",
|
||||
"_strip_non_ascii",
|
||||
"_sanitize_messages_non_ascii",
|
||||
"_sanitize_tools_non_ascii",
|
||||
"_strip_images_from_messages",
|
||||
"_sanitize_structure_non_ascii",
|
||||
]
|
||||
167
agent/process_bootstrap.py
Normal file
167
agent/process_bootstrap.py
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
"""Process-level bootstrap helpers for ``run_agent``.
|
||||
|
||||
Three concerns, all tied to ``AIAgent`` boot-time / runtime IO setup:
|
||||
|
||||
1. **Lazy OpenAI SDK import** — ``_load_openai_cls`` + ``_OpenAIProxy``
|
||||
defer the 240ms-ish ``from openai import OpenAI`` cost until first use,
|
||||
while preserving ``isinstance(client, OpenAI)`` checks and
|
||||
``patch("run_agent.OpenAI", ...)`` test patterns.
|
||||
|
||||
2. **Crash-resistant stdio** — ``_SafeWriter`` wraps stdout/stderr so
|
||||
``OSError: Input/output error`` from broken pipes (systemd, Docker,
|
||||
thread teardown races) cannot crash the agent. ``_install_safe_stdio``
|
||||
applies the wrapper.
|
||||
|
||||
3. **HTTP proxy resolution** — ``_get_proxy_from_env`` reads
|
||||
``HTTPS_PROXY`` / ``HTTP_PROXY`` / ``ALL_PROXY``;
|
||||
``_get_proxy_for_base_url`` respects ``NO_PROXY`` for the given base URL.
|
||||
|
||||
``run_agent`` re-exports every name so existing
|
||||
``from run_agent import _get_proxy_from_env`` imports keep working
|
||||
unchanged.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
from typing import Optional
|
||||
|
||||
from utils import base_url_hostname, normalize_proxy_url
|
||||
|
||||
|
||||
# Cached at module level so we only pay the OpenAI SDK import cost once
|
||||
# per process (after the first lazy load).
|
||||
_OPENAI_CLS_CACHE = None
|
||||
|
||||
|
||||
def _load_openai_cls() -> type:
|
||||
"""Import and cache ``openai.OpenAI``."""
|
||||
global _OPENAI_CLS_CACHE
|
||||
if _OPENAI_CLS_CACHE is None:
|
||||
from openai import OpenAI as _cls
|
||||
_OPENAI_CLS_CACHE = _cls
|
||||
return _OPENAI_CLS_CACHE
|
||||
|
||||
|
||||
class _OpenAIProxy:
|
||||
"""Module-level proxy that looks like ``openai.OpenAI`` but imports lazily."""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return _load_openai_cls()(*args, **kwargs)
|
||||
|
||||
def __instancecheck__(self, obj):
|
||||
return isinstance(obj, _load_openai_cls())
|
||||
|
||||
def __repr__(self):
|
||||
return "<lazy openai.OpenAI proxy>"
|
||||
|
||||
|
||||
class _SafeWriter:
|
||||
"""Transparent stdio wrapper that catches OSError/ValueError from broken pipes.
|
||||
|
||||
When hermes-agent runs as a systemd service, Docker container, or headless
|
||||
daemon, the stdout/stderr pipe can become unavailable (idle timeout, buffer
|
||||
exhaustion, socket reset). Any print() call then raises
|
||||
``OSError: [Errno 5] Input/output error``, which can crash agent setup or
|
||||
run_conversation() — especially via double-fault when an except handler
|
||||
also tries to print.
|
||||
|
||||
Additionally, when subagents run in ThreadPoolExecutor threads, the shared
|
||||
stdout handle can close between thread teardown and cleanup, raising
|
||||
``ValueError: I/O operation on closed file`` instead of OSError.
|
||||
|
||||
This wrapper delegates all writes to the underlying stream and silently
|
||||
catches both OSError and ValueError. It is transparent when the wrapped
|
||||
stream is healthy.
|
||||
"""
|
||||
|
||||
__slots__ = ("_inner",)
|
||||
|
||||
def __init__(self, inner):
|
||||
object.__setattr__(self, "_inner", inner)
|
||||
|
||||
def write(self, data):
|
||||
try:
|
||||
return self._inner.write(data)
|
||||
except (OSError, ValueError):
|
||||
return len(data) if isinstance(data, str) else 0
|
||||
|
||||
def flush(self):
|
||||
try:
|
||||
self._inner.flush()
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
|
||||
def fileno(self):
|
||||
return self._inner.fileno()
|
||||
|
||||
def isatty(self):
|
||||
try:
|
||||
return self._inner.isatty()
|
||||
except (OSError, ValueError):
|
||||
return False
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._inner, name)
|
||||
|
||||
|
||||
def _get_proxy_from_env() -> Optional[str]:
|
||||
"""Read proxy URL from environment variables.
|
||||
|
||||
Checks HTTPS_PROXY, HTTP_PROXY, ALL_PROXY (and lowercase variants) in order.
|
||||
Returns the first valid proxy URL found, or None if no proxy is configured.
|
||||
"""
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
value = os.environ.get(key, "").strip()
|
||||
if value:
|
||||
return normalize_proxy_url(value)
|
||||
return None
|
||||
|
||||
|
||||
def _get_proxy_for_base_url(base_url: Optional[str]) -> Optional[str]:
|
||||
"""Return an env-configured proxy unless NO_PROXY excludes this base URL."""
|
||||
proxy = _get_proxy_from_env()
|
||||
if not proxy or not base_url:
|
||||
return proxy
|
||||
|
||||
host = base_url_hostname(base_url)
|
||||
if not host:
|
||||
return proxy
|
||||
|
||||
try:
|
||||
if urllib.request.proxy_bypass_environment(host):
|
||||
return None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return proxy
|
||||
|
||||
|
||||
def _install_safe_stdio() -> None:
|
||||
"""Wrap stdout/stderr so best-effort console output cannot crash the agent."""
|
||||
for stream_name in ("stdout", "stderr"):
|
||||
stream = getattr(sys, stream_name, None)
|
||||
if stream is not None and not isinstance(stream, _SafeWriter):
|
||||
setattr(sys, stream_name, _SafeWriter(stream))
|
||||
|
||||
|
||||
# Module-level proxy instance — drops in for ``openai.OpenAI``. Imported as
|
||||
# ``from agent.process_bootstrap import OpenAI`` (or re-exported via
|
||||
# ``run_agent`` for legacy tests).
|
||||
OpenAI = _OpenAIProxy()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"OpenAI",
|
||||
"_OpenAIProxy",
|
||||
"_load_openai_cls",
|
||||
"_SafeWriter",
|
||||
"_install_safe_stdio",
|
||||
"_get_proxy_from_env",
|
||||
"_get_proxy_for_base_url",
|
||||
]
|
||||
280
agent/stream_diag.py
Normal file
280
agent/stream_diag.py
Normal file
|
|
@ -0,0 +1,280 @@
|
|||
"""Stream diagnostics — per-attempt counters, exception chains, retry logging.
|
||||
|
||||
When a streaming chat-completions request dies mid-response, we want to
|
||||
know why: which Cloudflare edge served the request, which OpenRouter
|
||||
downstream provider answered, how many bytes/chunks we got before the
|
||||
drop, the HTTP status, the underlying httpx error class. These helpers
|
||||
collect that info and emit it both to ``agent.log`` (full detail) and to
|
||||
the user-facing status line (compact).
|
||||
|
||||
All helpers are extracted from :class:`AIAgent` for cleanliness.
|
||||
``run_agent`` keeps thin forwarder methods so existing call sites and
|
||||
tests that patch ``run_agent.<helper>`` keep working.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Per-attempt stream diagnostic headers. Lowercased; httpx returns
|
||||
# CIMultiDict so case-insensitive lookups already work, but we read .get()
|
||||
# on the dict from agent.log for free-form post-hoc analysis.
|
||||
STREAM_DIAG_HEADERS = (
|
||||
"cf-ray",
|
||||
"cf-cache-status",
|
||||
"x-openrouter-provider",
|
||||
"x-openrouter-model",
|
||||
"x-openrouter-id",
|
||||
"x-request-id",
|
||||
"x-vercel-id",
|
||||
"via",
|
||||
"server",
|
||||
"x-forwarded-for",
|
||||
)
|
||||
|
||||
|
||||
def stream_diag_init() -> Dict[str, Any]:
|
||||
"""Return a fresh per-attempt diagnostic dict.
|
||||
|
||||
Mutated in-place by the streaming functions and read from the retry
|
||||
block when a stream dies. Lives on ``request_client_holder`` so it
|
||||
survives across the closure boundary.
|
||||
"""
|
||||
return {
|
||||
"started_at": time.time(),
|
||||
"first_chunk_at": None,
|
||||
"chunks": 0,
|
||||
"bytes": 0,
|
||||
"headers": {},
|
||||
"http_status": None,
|
||||
}
|
||||
|
||||
|
||||
def stream_diag_capture_response(agent: Any, diag: Dict[str, Any], http_response: Any) -> None:
|
||||
"""Snapshot interesting headers + HTTP status from the live stream.
|
||||
|
||||
Called once at stream open (before iterating chunks) so the metadata
|
||||
survives even if the stream dies before any chunk arrives. Failures
|
||||
are swallowed — diag is best-effort.
|
||||
"""
|
||||
if http_response is None or not isinstance(diag, dict):
|
||||
return
|
||||
try:
|
||||
diag["http_status"] = getattr(http_response, "status_code", None)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
headers = getattr(http_response, "headers", None) or {}
|
||||
captured: Dict[str, str] = {}
|
||||
# Allow per-agent override of the headers list (back-compat).
|
||||
target_headers = getattr(agent, "_STREAM_DIAG_HEADERS", STREAM_DIAG_HEADERS)
|
||||
for name in target_headers:
|
||||
try:
|
||||
val = headers.get(name)
|
||||
if val:
|
||||
# Truncate single-value to keep log lines bounded.
|
||||
captured[name] = str(val)[:120]
|
||||
except Exception:
|
||||
continue
|
||||
diag["headers"] = captured
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def flatten_exception_chain(error: BaseException) -> str:
|
||||
"""Return a compact ``Outer(msg) <- Inner(msg) <- ...`` rendering.
|
||||
|
||||
OpenAI SDK wraps httpx errors as ``APIConnectionError`` /
|
||||
``APIError`` and only the wrapper's class is visible at the catch
|
||||
site — but the underlying ``RemoteProtocolError`` /
|
||||
``ConnectError`` / ``ReadError`` is what tells us WHY the stream
|
||||
died. Walks ``__cause__`` then ``__context__`` (deduped, max 4
|
||||
deep) to surface the chain in one line.
|
||||
"""
|
||||
seen: List[BaseException] = []
|
||||
link: Optional[BaseException] = error
|
||||
while link is not None and len(seen) < 4:
|
||||
if link in seen:
|
||||
break
|
||||
seen.append(link)
|
||||
nxt = getattr(link, "__cause__", None) or getattr(
|
||||
link, "__context__", None
|
||||
)
|
||||
if nxt is None or nxt is link:
|
||||
break
|
||||
link = nxt
|
||||
parts: List[str] = []
|
||||
for e in seen:
|
||||
msg = str(e).strip().replace("\n", " ")
|
||||
if len(msg) > 140:
|
||||
msg = msg[:140] + "…"
|
||||
parts.append(f"{type(e).__name__}({msg})" if msg else type(e).__name__)
|
||||
return " <- ".join(parts) if parts else type(error).__name__
|
||||
|
||||
|
||||
def log_stream_retry(
|
||||
agent: Any,
|
||||
*,
|
||||
kind: str,
|
||||
error: BaseException,
|
||||
attempt: int,
|
||||
max_attempts: int,
|
||||
mid_tool_call: bool,
|
||||
diag: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Record a transient stream-drop and retry to ``agent.log``.
|
||||
|
||||
Always logs a structured WARNING so users have a breadcrumb regardless
|
||||
of UI verbosity. Subagents in particular benefit because their
|
||||
retries no longer spam the parent's terminal — but the file log keeps
|
||||
full detail (provider, error class, attempt, base_url, subagent_id).
|
||||
|
||||
When *diag* is provided (the per-attempt stream-diagnostic dict from
|
||||
:func:`stream_diag_init`), the WARNING also captures upstream headers
|
||||
(cf-ray, x-openrouter-provider, x-openrouter-id), HTTP status, bytes
|
||||
streamed before the drop, and elapsed time on the dying attempt.
|
||||
These are the breadcrumbs needed to answer "is one CF edge / one
|
||||
downstream provider responsible, or is it random across runs?"
|
||||
"""
|
||||
try:
|
||||
try:
|
||||
_summary = agent._summarize_api_error(error)
|
||||
except Exception:
|
||||
_summary = str(error)
|
||||
if _summary and len(_summary) > 240:
|
||||
_summary = _summary[:240] + "…"
|
||||
|
||||
# Inner-cause chain (httpx errors hide under openai.APIError).
|
||||
try:
|
||||
_chain = flatten_exception_chain(error)
|
||||
except Exception:
|
||||
_chain = type(error).__name__
|
||||
|
||||
# Per-attempt counters and upstream headers.
|
||||
_now = time.time()
|
||||
_bytes = 0
|
||||
_chunks = 0
|
||||
_elapsed = 0.0
|
||||
_ttfb = None
|
||||
_headers_repr = "-"
|
||||
_http_status = "-"
|
||||
if isinstance(diag, dict):
|
||||
try:
|
||||
_bytes = int(diag.get("bytes") or 0)
|
||||
_chunks = int(diag.get("chunks") or 0)
|
||||
_started = float(diag.get("started_at") or _now)
|
||||
_elapsed = max(0.0, _now - _started)
|
||||
_first = diag.get("first_chunk_at")
|
||||
if _first is not None:
|
||||
_ttfb = max(0.0, float(_first) - _started)
|
||||
headers = diag.get("headers") or {}
|
||||
if isinstance(headers, dict) and headers:
|
||||
_headers_repr = " ".join(
|
||||
f"{k}={v}" for k, v in headers.items()
|
||||
)
|
||||
if diag.get("http_status") is not None:
|
||||
_http_status = str(diag.get("http_status"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.warning(
|
||||
"Stream %s on attempt %s/%s — retrying. "
|
||||
"subagent_id=%s depth=%s provider=%s base_url=%s "
|
||||
"error_type=%s error=%s "
|
||||
"chain=%s "
|
||||
"http_status=%s bytes=%d chunks=%d elapsed=%.2fs ttfb=%s "
|
||||
"upstream=[%s]",
|
||||
kind,
|
||||
attempt,
|
||||
max_attempts,
|
||||
getattr(agent, "_subagent_id", None) or "-",
|
||||
getattr(agent, "_delegate_depth", 0),
|
||||
agent.provider or "-",
|
||||
agent.base_url or "-",
|
||||
type(error).__name__,
|
||||
_summary,
|
||||
_chain,
|
||||
_http_status,
|
||||
_bytes,
|
||||
_chunks,
|
||||
_elapsed,
|
||||
f"{_ttfb:.2f}s" if _ttfb is not None else "-",
|
||||
_headers_repr,
|
||||
extra={"mid_tool_call": mid_tool_call},
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("stream-retry log emit failed", exc_info=True)
|
||||
|
||||
|
||||
def emit_stream_drop(
|
||||
agent: Any,
|
||||
*,
|
||||
error: BaseException,
|
||||
attempt: int,
|
||||
max_attempts: int,
|
||||
mid_tool_call: bool,
|
||||
diag: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Emit a single user-visible line for a stream drop+retry.
|
||||
|
||||
Both top-level agents and subagents announce drops in the UI — the
|
||||
parent prefixes subagent lines with ``[subagent-N]`` via ``log_prefix``
|
||||
so they're easy to attribute. All cases also write a structured
|
||||
WARNING to ``agent.log`` via :func:`log_stream_retry` with the full
|
||||
diagnostic detail (subagent_id, provider, base_url, error_type,
|
||||
cf-ray, x-openrouter-provider, bytes/chunks, elapsed) for post-hoc
|
||||
analysis.
|
||||
|
||||
The user-visible status line is intentionally compact: provider,
|
||||
error class, attempt N/M, plus ``after Xs`` when the stream dropped
|
||||
mid-flight. Full diagnostic detail goes to ``agent.log`` only —
|
||||
``hermes logs --level WARNING | grep "Stream drop"`` to inspect.
|
||||
"""
|
||||
kind = "drop mid tool-call" if mid_tool_call else "drop"
|
||||
log_stream_retry(
|
||||
agent,
|
||||
kind=kind,
|
||||
error=error,
|
||||
attempt=attempt,
|
||||
max_attempts=max_attempts,
|
||||
mid_tool_call=mid_tool_call,
|
||||
diag=diag,
|
||||
)
|
||||
provider = agent.provider or "provider"
|
||||
# Compose a brief "after Xs" suffix when we have timing data — helps
|
||||
# the user distinguish "couldn't connect" (0s) from "died after 30s
|
||||
# of streaming" (likely upstream idle-kill or proxy timeout).
|
||||
_suffix = ""
|
||||
if isinstance(diag, dict):
|
||||
try:
|
||||
started = diag.get("started_at")
|
||||
if started is not None:
|
||||
_suffix = f" after {max(0.0, time.time() - float(started)):.1f}s"
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
agent._emit_status(
|
||||
f"⚠️ {provider} stream {kind} ({type(error).__name__}){_suffix} "
|
||||
f"— reconnecting, retry {attempt}/{max_attempts}"
|
||||
)
|
||||
agent._touch_activity(
|
||||
f"stream retry {attempt}/{max_attempts} "
|
||||
f"after {type(error).__name__}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
__all__ = [
|
||||
"STREAM_DIAG_HEADERS",
|
||||
"stream_diag_init",
|
||||
"stream_diag_capture_response",
|
||||
"flatten_exception_chain",
|
||||
"log_stream_retry",
|
||||
"emit_stream_drop",
|
||||
]
|
||||
333
agent/system_prompt.py
Normal file
333
agent/system_prompt.py
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
"""System-prompt assembly for :class:`AIAgent`.
|
||||
|
||||
The agent's system prompt is built once per session and reused across all
|
||||
turns — only context compression triggers a rebuild. This keeps the
|
||||
upstream prefix cache warm. See ``hermes-agent-dev``'s
|
||||
``references/system-prompt-invariant.md`` for the invariants and
|
||||
``references/self-improvement-loop.md`` for how the background-review
|
||||
fork inherits the cached prompt verbatim.
|
||||
|
||||
Three tiers are joined with ``\\n\\n``:
|
||||
|
||||
* ``stable`` — identity (SOUL.md or DEFAULT_AGENT_IDENTITY), tool
|
||||
guidance, computer-use guidance, nous subscription block, tool-use
|
||||
enforcement guidance + per-model operational guidance, skills prompt,
|
||||
alibaba model-name workaround, environment hints, platform hints.
|
||||
* ``context`` — caller-supplied ``system_message`` plus context files
|
||||
(AGENTS.md / .cursorrules / etc.) discovered under ``TERMINAL_CWD``.
|
||||
* ``volatile`` — memory snapshot, USER.md profile, external memory
|
||||
provider block, timestamp/session/model/provider line.
|
||||
|
||||
Pure helpers that read the agent's state. AIAgent keeps thin forwarders.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.prompt_builder import (
|
||||
DEFAULT_AGENT_IDENTITY,
|
||||
GOOGLE_MODEL_OPERATIONAL_GUIDANCE,
|
||||
HERMES_AGENT_HELP_GUIDANCE,
|
||||
KANBAN_GUIDANCE,
|
||||
MEMORY_GUIDANCE,
|
||||
OPENAI_MODEL_EXECUTION_GUIDANCE,
|
||||
PLATFORM_HINTS,
|
||||
SESSION_SEARCH_GUIDANCE,
|
||||
SKILLS_GUIDANCE,
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE,
|
||||
TOOL_USE_ENFORCEMENT_MODELS,
|
||||
)
|
||||
|
||||
|
||||
def _ra():
|
||||
"""Lazy reference to the ``run_agent`` module.
|
||||
|
||||
Helpers like ``load_soul_md``, ``build_environment_hints``,
|
||||
``build_context_files_prompt``, ``build_nous_subscription_prompt``,
|
||||
``build_skills_system_prompt`` and ``get_toolset_for_tool`` are
|
||||
imported into ``run_agent``'s namespace. Many tests
|
||||
``patch("run_agent.load_soul_md", ...)``; if we imported them
|
||||
directly here those patches would not reach us. Looking them up
|
||||
through ``run_agent`` on every call preserves the patch contract.
|
||||
"""
|
||||
import run_agent
|
||||
return run_agent
|
||||
|
||||
|
||||
def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None) -> Dict[str, str]:
|
||||
"""Assemble the system prompt as three ordered parts.
|
||||
|
||||
Returns a dict with three keys:
|
||||
* ``stable`` — identity, tool guidance, skills prompt,
|
||||
environment hints, platform hints, model-family operational
|
||||
guidance.
|
||||
* ``context`` — context files (AGENTS.md, .cursorrules, etc.)
|
||||
and caller-supplied system_message.
|
||||
* ``volatile`` — memory snapshot, user profile, external
|
||||
memory provider block, timestamp line.
|
||||
|
||||
Joined into a single string by :func:`build_system_prompt` and
|
||||
cached on ``agent._cached_system_prompt`` for the lifetime of the
|
||||
AIAgent. Hermes never re-renders parts of this string mid-
|
||||
session — that's the only way to keep upstream prompt caches
|
||||
warm across turns.
|
||||
"""
|
||||
# Local import to avoid pulling model_tools at module load. Tests
|
||||
# patch ``run_agent.get_toolset_for_tool`` and similar helpers, so
|
||||
# we resolve through ``_ra()`` to honor those patches.
|
||||
_r = _ra()
|
||||
|
||||
# ── Stable tier ────────────────────────────────────────────────
|
||||
stable_parts: List[str] = []
|
||||
|
||||
# Try SOUL.md as primary identity unless the caller explicitly skipped it.
|
||||
# Some execution modes (cron) still want HERMES_HOME persona while keeping
|
||||
# cwd project instructions disabled.
|
||||
_soul_loaded = False
|
||||
if agent.load_soul_identity or not agent.skip_context_files:
|
||||
_soul_content = _r.load_soul_md()
|
||||
if _soul_content:
|
||||
stable_parts.append(_soul_content)
|
||||
_soul_loaded = True
|
||||
|
||||
if not _soul_loaded:
|
||||
# Fallback to hardcoded identity
|
||||
stable_parts.append(DEFAULT_AGENT_IDENTITY)
|
||||
|
||||
# Pointer to the hermes-agent skill + docs for user questions about Hermes itself.
|
||||
stable_parts.append(HERMES_AGENT_HELP_GUIDANCE)
|
||||
|
||||
# Tool-aware behavioral guidance: only inject when the tools are loaded
|
||||
tool_guidance = []
|
||||
if "memory" in agent.valid_tool_names:
|
||||
tool_guidance.append(MEMORY_GUIDANCE)
|
||||
if "session_search" in agent.valid_tool_names:
|
||||
tool_guidance.append(SESSION_SEARCH_GUIDANCE)
|
||||
if "skill_manage" in agent.valid_tool_names:
|
||||
tool_guidance.append(SKILLS_GUIDANCE)
|
||||
# Kanban worker/orchestrator lifecycle — only present when the
|
||||
# dispatcher spawned this process (kanban_show check_fn gates on
|
||||
# HERMES_KANBAN_TASK env var). Normal chat sessions never see
|
||||
# this block.
|
||||
if "kanban_show" in agent.valid_tool_names:
|
||||
tool_guidance.append(KANBAN_GUIDANCE)
|
||||
if tool_guidance:
|
||||
stable_parts.append(" ".join(tool_guidance))
|
||||
|
||||
# Computer-use (macOS) — goes in as its own block rather than being
|
||||
# merged into tool_guidance because the content is multi-paragraph.
|
||||
if "computer_use" in agent.valid_tool_names:
|
||||
from agent.prompt_builder import COMPUTER_USE_GUIDANCE
|
||||
stable_parts.append(COMPUTER_USE_GUIDANCE)
|
||||
|
||||
nous_subscription_prompt = _r.build_nous_subscription_prompt(agent.valid_tool_names)
|
||||
if nous_subscription_prompt:
|
||||
stable_parts.append(nous_subscription_prompt)
|
||||
# Tool-use enforcement: tells the model to actually call tools instead
|
||||
# of describing intended actions. Controlled by config.yaml
|
||||
# agent.tool_use_enforcement:
|
||||
# "auto" (default) — matches TOOL_USE_ENFORCEMENT_MODELS
|
||||
# true — always inject (all models)
|
||||
# false — never inject
|
||||
# list — custom model-name substrings to match
|
||||
if agent.valid_tool_names:
|
||||
_enforce = agent._tool_use_enforcement
|
||||
_inject = False
|
||||
if _enforce is True or (isinstance(_enforce, str) and _enforce.lower() in {"true", "always", "yes", "on"}):
|
||||
_inject = True
|
||||
elif _enforce is False or (isinstance(_enforce, str) and _enforce.lower() in {"false", "never", "no", "off"}):
|
||||
_inject = False
|
||||
elif isinstance(_enforce, list):
|
||||
model_lower = (agent.model or "").lower()
|
||||
_inject = any(p.lower() in model_lower for p in _enforce if isinstance(p, str))
|
||||
else:
|
||||
# "auto" or any unrecognised value — use hardcoded defaults
|
||||
model_lower = (agent.model or "").lower()
|
||||
_inject = any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS)
|
||||
if _inject:
|
||||
stable_parts.append(TOOL_USE_ENFORCEMENT_GUIDANCE)
|
||||
_model_lower = (agent.model or "").lower()
|
||||
# Google model operational guidance (conciseness, absolute
|
||||
# paths, parallel tool calls, verify-before-edit, etc.)
|
||||
if "gemini" in _model_lower or "gemma" in _model_lower:
|
||||
stable_parts.append(GOOGLE_MODEL_OPERATIONAL_GUIDANCE)
|
||||
# OpenAI GPT/Codex execution discipline (tool persistence,
|
||||
# prerequisite checks, verification, anti-hallucination).
|
||||
if "gpt" in _model_lower or "codex" in _model_lower:
|
||||
stable_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE)
|
||||
|
||||
has_skills_tools = any(name in agent.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage'])
|
||||
if has_skills_tools:
|
||||
avail_toolsets = {
|
||||
toolset
|
||||
for toolset in (
|
||||
_r.get_toolset_for_tool(tool_name) for tool_name in agent.valid_tool_names
|
||||
)
|
||||
if toolset
|
||||
}
|
||||
skills_prompt = _r.build_skills_system_prompt(
|
||||
available_tools=agent.valid_tool_names,
|
||||
available_toolsets=avail_toolsets,
|
||||
)
|
||||
else:
|
||||
skills_prompt = ""
|
||||
if skills_prompt:
|
||||
stable_parts.append(skills_prompt)
|
||||
|
||||
# Alibaba Coding Plan API always returns "glm-4.7" as model name regardless
|
||||
# of the requested model. Inject explicit model identity into the system prompt
|
||||
# so the agent can correctly report which model it is (workaround for API bug).
|
||||
# Stable for the lifetime of an agent instance — model and provider are fixed
|
||||
# at construction time.
|
||||
if agent.provider == "alibaba":
|
||||
_model_short = agent.model.split("/")[-1] if "/" in agent.model else agent.model
|
||||
stable_parts.append(
|
||||
f"You are powered by the model named {_model_short}. "
|
||||
f"The exact model ID is {agent.model}. "
|
||||
f"When asked what model you are, always answer based on this information, "
|
||||
f"not on any model name returned by the API."
|
||||
)
|
||||
|
||||
# Environment hints (WSL, Termux, etc.) — tell the agent about the
|
||||
# execution environment so it can translate paths and adapt behavior.
|
||||
# Stable for the lifetime of the process.
|
||||
_env_hints = _r.build_environment_hints()
|
||||
if _env_hints:
|
||||
stable_parts.append(_env_hints)
|
||||
|
||||
platform_key = (agent.platform or "").lower().strip()
|
||||
if platform_key in PLATFORM_HINTS:
|
||||
stable_parts.append(PLATFORM_HINTS[platform_key])
|
||||
elif platform_key:
|
||||
# Check plugin registry for platform-specific LLM guidance
|
||||
try:
|
||||
from gateway.platform_registry import platform_registry
|
||||
_entry = platform_registry.get(platform_key)
|
||||
if _entry and _entry.platform_hint:
|
||||
stable_parts.append(_entry.platform_hint)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ── Context tier (cwd-dependent, may change between sessions) ─
|
||||
context_parts: List[str] = []
|
||||
|
||||
# Note: ephemeral_system_prompt is NOT included here. It's injected at
|
||||
# API-call time only so it stays out of the cached/stored system prompt.
|
||||
if system_message is not None:
|
||||
context_parts.append(system_message)
|
||||
|
||||
if not agent.skip_context_files:
|
||||
# Use TERMINAL_CWD for context file discovery when set (gateway
|
||||
# mode). The gateway process runs from the hermes-agent install
|
||||
# dir, so os.getcwd() would pick up the repo's AGENTS.md and
|
||||
# other dev files — inflating token usage by ~10k for no benefit.
|
||||
_context_cwd = os.getenv("TERMINAL_CWD") or None
|
||||
context_files_prompt = _r.build_context_files_prompt(
|
||||
cwd=_context_cwd, skip_soul=_soul_loaded)
|
||||
if context_files_prompt:
|
||||
context_parts.append(context_files_prompt)
|
||||
|
||||
# ── Volatile tier (changes per session/turn — never cached) ───
|
||||
volatile_parts: List[str] = []
|
||||
|
||||
if agent._memory_store:
|
||||
if agent._memory_enabled:
|
||||
mem_block = agent._memory_store.format_for_system_prompt("memory")
|
||||
if mem_block:
|
||||
volatile_parts.append(mem_block)
|
||||
# USER.md is always included when enabled.
|
||||
if agent._user_profile_enabled:
|
||||
user_block = agent._memory_store.format_for_system_prompt("user")
|
||||
if user_block:
|
||||
volatile_parts.append(user_block)
|
||||
|
||||
# External memory provider system prompt block (additive to built-in)
|
||||
if agent._memory_manager:
|
||||
try:
|
||||
_ext_mem_block = agent._memory_manager.build_system_prompt()
|
||||
if _ext_mem_block:
|
||||
volatile_parts.append(_ext_mem_block)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
from hermes_time import now as _hermes_now
|
||||
now = _hermes_now()
|
||||
timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
|
||||
if agent.pass_session_id and agent.session_id:
|
||||
timestamp_line += f"\nSession ID: {agent.session_id}"
|
||||
if agent.model:
|
||||
timestamp_line += f"\nModel: {agent.model}"
|
||||
if agent.provider:
|
||||
timestamp_line += f"\nProvider: {agent.provider}"
|
||||
volatile_parts.append(timestamp_line)
|
||||
|
||||
return {
|
||||
"stable": "\n\n".join(p.strip() for p in stable_parts if p and p.strip()),
|
||||
"context": "\n\n".join(p.strip() for p in context_parts if p and p.strip()),
|
||||
"volatile": "\n\n".join(p.strip() for p in volatile_parts if p and p.strip()),
|
||||
}
|
||||
|
||||
|
||||
def build_system_prompt(agent: Any, system_message: Optional[str] = None) -> str:
|
||||
"""Assemble the full system prompt from all layers.
|
||||
|
||||
Called once per session (cached on ``agent._cached_system_prompt``) and
|
||||
only rebuilt after context compression events. This ensures the system
|
||||
prompt is stable across all turns in a session, maximizing prefix cache
|
||||
hits.
|
||||
|
||||
Layers are ordered cache-friendly: stable identity/guidance first,
|
||||
then session-stable context files, then per-call volatile content
|
||||
(memory, USER profile, timestamp). The whole string is treated as
|
||||
one cached block — Hermes never rebuilds or reinjects parts of it
|
||||
mid-session, which is the only way to keep upstream prompt caches
|
||||
warm across turns.
|
||||
"""
|
||||
parts = build_system_prompt_parts(agent, system_message=system_message)
|
||||
return "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
|
||||
|
||||
|
||||
def invalidate_system_prompt(agent: Any) -> None:
|
||||
"""Invalidate the cached system prompt, forcing a rebuild on the next turn.
|
||||
|
||||
Called after context compression events. Also reloads memory from disk
|
||||
so the rebuilt prompt captures any writes from this session.
|
||||
"""
|
||||
agent._cached_system_prompt = None
|
||||
if agent._memory_store:
|
||||
agent._memory_store.load_from_disk()
|
||||
|
||||
|
||||
def format_tools_for_system_message(agent: Any) -> str:
|
||||
"""Format tool definitions for the system message in the trajectory format.
|
||||
|
||||
Returns:
|
||||
str: JSON string representation of tool definitions
|
||||
"""
|
||||
if not agent.tools:
|
||||
return "[]"
|
||||
|
||||
# Convert tool definitions to the format expected in trajectories
|
||||
formatted_tools = []
|
||||
for tool in agent.tools:
|
||||
func = tool["function"]
|
||||
formatted_tool = {
|
||||
"name": func["name"],
|
||||
"description": func.get("description", ""),
|
||||
"parameters": func.get("parameters", {}),
|
||||
"required": None # Match the format in the example
|
||||
}
|
||||
formatted_tools.append(formatted_tool)
|
||||
|
||||
return json.dumps(formatted_tools, ensure_ascii=False)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"build_system_prompt_parts",
|
||||
"build_system_prompt",
|
||||
"invalidate_system_prompt",
|
||||
"format_tools_for_system_message",
|
||||
]
|
||||
336
agent/tool_dispatch_helpers.py
Normal file
336
agent/tool_dispatch_helpers.py
Normal file
|
|
@ -0,0 +1,336 @@
|
|||
"""Tool-dispatch helpers — parallelism gating, multimodal envelopes, mutation tracking.
|
||||
|
||||
Pure module-level utilities extracted from ``run_agent.py``:
|
||||
|
||||
* ``_is_destructive_command`` — terminal-command heuristic used to gate
|
||||
parallel batch dispatch.
|
||||
* ``_should_parallelize_tool_batch`` / ``_extract_parallel_scope_path`` /
|
||||
``_paths_overlap`` — the rules engine deciding when a multi-tool batch
|
||||
can run concurrently.
|
||||
* ``_is_multimodal_tool_result`` / ``_multimodal_text_summary`` /
|
||||
``_append_subdir_hint_to_multimodal`` — envelope helpers for the
|
||||
``{"_multimodal": True, "content": [...], "text_summary": ...}`` dict
|
||||
shape returned by tools like ``computer_use``.
|
||||
* ``_extract_file_mutation_targets`` / ``_extract_error_preview`` —
|
||||
per-turn file-mutation verifier inputs.
|
||||
* ``_trajectory_normalize_msg`` — strip image blobs from a message for
|
||||
trajectory saving.
|
||||
|
||||
All helpers are stateless. ``run_agent`` re-exports each name so existing
|
||||
``from run_agent import ...`` imports in tests and other modules keep
|
||||
working unchanged.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.tool_result_classification import (
|
||||
FILE_MUTATING_TOOL_NAMES as _FILE_MUTATING_TOOLS,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Tools that must never run concurrently (interactive / user-facing).
|
||||
# When any of these appear in a batch, we fall back to sequential execution.
|
||||
_NEVER_PARALLEL_TOOLS = frozenset({"clarify"})
|
||||
|
||||
# Read-only tools with no shared mutable session state.
|
||||
_PARALLEL_SAFE_TOOLS = frozenset({
|
||||
"ha_get_state",
|
||||
"ha_list_entities",
|
||||
"ha_list_services",
|
||||
"read_file",
|
||||
"search_files",
|
||||
"session_search",
|
||||
"skill_view",
|
||||
"skills_list",
|
||||
"vision_analyze",
|
||||
"web_extract",
|
||||
"web_search",
|
||||
})
|
||||
|
||||
# File tools can run concurrently when they target independent paths.
|
||||
_PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
|
||||
|
||||
# Patterns that indicate a terminal command may modify/delete files.
|
||||
_DESTRUCTIVE_PATTERNS = re.compile(
|
||||
r"""(?:^|\s|&&|\|\||;|`)(?:
|
||||
rm\s|rmdir\s|
|
||||
cp\s|install\s|
|
||||
mv\s|
|
||||
sed\s+-i|
|
||||
truncate\s|
|
||||
dd\s|
|
||||
shred\s|
|
||||
git\s+(?:reset|clean|checkout)\s
|
||||
)""",
|
||||
re.VERBOSE,
|
||||
)
|
||||
# Output redirects that overwrite files (> but not >>)
|
||||
_REDIRECT_OVERWRITE = re.compile(r'[^>]>[^>]|^>[^>]')
|
||||
|
||||
|
||||
def _is_destructive_command(cmd: str) -> bool:
|
||||
"""Heuristic: does this terminal command look like it modifies/deletes files?"""
|
||||
if not cmd:
|
||||
return False
|
||||
if _DESTRUCTIVE_PATTERNS.search(cmd):
|
||||
return True
|
||||
if _REDIRECT_OVERWRITE.search(cmd):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _is_mcp_tool_parallel_safe(tool_name: str) -> bool:
|
||||
"""Check if an MCP tool comes from a server with parallel tool calls enabled.
|
||||
|
||||
Lazy-imports from ``tools.mcp_tool`` to avoid circular dependencies.
|
||||
Returns False if the MCP module is not available.
|
||||
"""
|
||||
try:
|
||||
from tools.mcp_tool import is_mcp_tool_parallel_safe
|
||||
return is_mcp_tool_parallel_safe(tool_name)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _should_parallelize_tool_batch(tool_calls) -> bool:
|
||||
"""Return True when a tool-call batch is safe to run concurrently."""
|
||||
if len(tool_calls) <= 1:
|
||||
return False
|
||||
|
||||
tool_names = [tc.function.name for tc in tool_calls]
|
||||
if any(name in _NEVER_PARALLEL_TOOLS for name in tool_names):
|
||||
return False
|
||||
|
||||
reserved_paths: list[Path] = []
|
||||
for tool_call in tool_calls:
|
||||
tool_name = tool_call.function.name
|
||||
try:
|
||||
function_args = json.loads(tool_call.function.arguments)
|
||||
except Exception:
|
||||
logging.debug(
|
||||
"Could not parse args for %s — defaulting to sequential; raw=%s",
|
||||
tool_name,
|
||||
tool_call.function.arguments[:200],
|
||||
)
|
||||
return False
|
||||
if not isinstance(function_args, dict):
|
||||
logging.debug(
|
||||
"Non-dict args for %s (%s) — defaulting to sequential",
|
||||
tool_name,
|
||||
type(function_args).__name__,
|
||||
)
|
||||
return False
|
||||
|
||||
if tool_name in _PATH_SCOPED_TOOLS:
|
||||
scoped_path = _extract_parallel_scope_path(tool_name, function_args)
|
||||
if scoped_path is None:
|
||||
return False
|
||||
if any(_paths_overlap(scoped_path, existing) for existing in reserved_paths):
|
||||
return False
|
||||
reserved_paths.append(scoped_path)
|
||||
continue
|
||||
|
||||
if tool_name not in _PARALLEL_SAFE_TOOLS:
|
||||
# Check if it's an MCP tool from a server that opted into parallel calls.
|
||||
if not _is_mcp_tool_parallel_safe(tool_name):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _extract_parallel_scope_path(tool_name: str, function_args: dict) -> Optional[Path]:
|
||||
"""Return the normalized file target for path-scoped tools."""
|
||||
if tool_name not in _PATH_SCOPED_TOOLS:
|
||||
return None
|
||||
|
||||
raw_path = function_args.get("path")
|
||||
if not isinstance(raw_path, str) or not raw_path.strip():
|
||||
return None
|
||||
|
||||
expanded = Path(raw_path).expanduser()
|
||||
if expanded.is_absolute():
|
||||
return Path(os.path.abspath(str(expanded)))
|
||||
|
||||
# Avoid resolve(); the file may not exist yet.
|
||||
return Path(os.path.abspath(str(Path.cwd() / expanded)))
|
||||
|
||||
|
||||
def _paths_overlap(left: Path, right: Path) -> bool:
|
||||
"""Return True when two paths may refer to the same subtree."""
|
||||
left_parts = left.parts
|
||||
right_parts = right.parts
|
||||
if not left_parts or not right_parts:
|
||||
# Empty paths shouldn't reach here (guarded upstream), but be safe.
|
||||
return bool(left_parts) == bool(right_parts) and bool(left_parts)
|
||||
common_len = min(len(left_parts), len(right_parts))
|
||||
return left_parts[:common_len] == right_parts[:common_len]
|
||||
|
||||
|
||||
def _is_multimodal_tool_result(value: Any) -> bool:
|
||||
"""True if the value is a multimodal tool result envelope.
|
||||
|
||||
Multimodal handlers (e.g. tools/computer_use) return a dict with
|
||||
`_multimodal=True`, a `content` key holding OpenAI-style content
|
||||
parts, and an optional `text_summary` for string-only fallbacks.
|
||||
"""
|
||||
return (
|
||||
isinstance(value, dict)
|
||||
and value.get("_multimodal") is True
|
||||
and isinstance(value.get("content"), list)
|
||||
)
|
||||
|
||||
|
||||
def _multimodal_text_summary(value: Any) -> str:
|
||||
"""Extract a plain text view of a multimodal tool result.
|
||||
|
||||
Used wherever downstream code needs a string — logging, previews,
|
||||
persistence size heuristics, fall-back content for providers that
|
||||
don't support multipart tool messages.
|
||||
"""
|
||||
if _is_multimodal_tool_result(value):
|
||||
if value.get("text_summary"):
|
||||
return str(value["text_summary"])
|
||||
parts = []
|
||||
for p in value.get("content") or []:
|
||||
if isinstance(p, dict) and p.get("type") == "text":
|
||||
parts.append(str(p.get("text", "")))
|
||||
if parts:
|
||||
return "\n".join(parts)
|
||||
return "[multimodal tool result]"
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
try:
|
||||
return json.dumps(value, default=str)
|
||||
except Exception:
|
||||
return str(value)
|
||||
|
||||
|
||||
def _append_subdir_hint_to_multimodal(value: Dict[str, Any], hint: str) -> None:
|
||||
"""Mutate a multimodal tool-result envelope to append a subdir hint.
|
||||
|
||||
The hint is added to the first text part so the model sees it; image
|
||||
parts are left untouched. `text_summary` is also updated for
|
||||
string-fallback callers.
|
||||
"""
|
||||
if not _is_multimodal_tool_result(value):
|
||||
return
|
||||
parts = value.get("content") or []
|
||||
for p in parts:
|
||||
if isinstance(p, dict) and p.get("type") == "text":
|
||||
p["text"] = str(p.get("text", "")) + hint
|
||||
break
|
||||
else:
|
||||
parts.insert(0, {"type": "text", "text": hint})
|
||||
value["content"] = parts
|
||||
if isinstance(value.get("text_summary"), str):
|
||||
value["text_summary"] = value["text_summary"] + hint
|
||||
|
||||
|
||||
def _extract_file_mutation_targets(tool_name: str, args: Dict[str, Any]) -> List[str]:
|
||||
"""Return the file paths a ``write_file`` or ``patch`` call is targeting.
|
||||
|
||||
For ``write_file`` and ``patch`` in replace mode this is just ``args["path"]``.
|
||||
For ``patch`` in V4A patch mode we parse the patch content for
|
||||
``*** Update File:`` / ``*** Add File:`` / ``*** Delete File:`` headers so
|
||||
the verifier can track each file in a multi-file patch separately.
|
||||
"""
|
||||
if tool_name not in _FILE_MUTATING_TOOLS:
|
||||
return []
|
||||
if tool_name == "write_file":
|
||||
p = args.get("path")
|
||||
return [str(p)] if p else []
|
||||
# tool_name == "patch"
|
||||
mode = args.get("mode") or "replace"
|
||||
if mode == "replace":
|
||||
p = args.get("path")
|
||||
return [str(p)] if p else []
|
||||
if mode == "patch":
|
||||
body = args.get("patch") or ""
|
||||
if not isinstance(body, str) or not body:
|
||||
return []
|
||||
paths: List[str] = []
|
||||
for _m in re.finditer(
|
||||
r'^\*\*\*\s+(?:Update|Add|Delete)\s+File:\s*(.+)$',
|
||||
body,
|
||||
re.MULTILINE,
|
||||
):
|
||||
p = _m.group(1).strip()
|
||||
if p:
|
||||
paths.append(p)
|
||||
return paths
|
||||
return []
|
||||
|
||||
|
||||
def _extract_error_preview(result: Any, max_len: int = 180) -> str:
|
||||
"""Pull a one-line error summary out of a tool result for footer display."""
|
||||
text = _multimodal_text_summary(result) if result is not None else ""
|
||||
if not isinstance(text, str):
|
||||
try:
|
||||
text = str(text)
|
||||
except Exception:
|
||||
return ""
|
||||
# Try to parse JSON and pull the ``error`` field — tool handlers return
|
||||
# ``{"success": false, "error": "..."}``; raw string wins if parse fails.
|
||||
stripped = text.strip()
|
||||
if stripped.startswith("{"):
|
||||
try:
|
||||
data = json.loads(stripped)
|
||||
if isinstance(data, dict) and isinstance(data.get("error"), str):
|
||||
text = data["error"]
|
||||
except Exception:
|
||||
pass
|
||||
# Collapse whitespace, trim to max_len.
|
||||
text = " ".join(text.split())
|
||||
if len(text) > max_len:
|
||||
text = text[: max_len - 1] + "…"
|
||||
return text
|
||||
|
||||
|
||||
def _trajectory_normalize_msg(msg: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Strip image blobs from a message for trajectory saving.
|
||||
|
||||
Returns a shallow copy with multimodal tool results replaced by their
|
||||
text_summary, and image parts in content lists replaced by
|
||||
`[screenshot]` placeholders. Keeps the message schema otherwise intact.
|
||||
"""
|
||||
if not isinstance(msg, dict):
|
||||
return msg
|
||||
content = msg.get("content")
|
||||
if _is_multimodal_tool_result(content):
|
||||
return {**msg, "content": _multimodal_text_summary(content)}
|
||||
if isinstance(content, list):
|
||||
cleaned = []
|
||||
for p in content:
|
||||
if isinstance(p, dict) and p.get("type") in {"image", "image_url", "input_image"}:
|
||||
cleaned.append({"type": "text", "text": "[screenshot]"})
|
||||
else:
|
||||
cleaned.append(p)
|
||||
return {**msg, "content": cleaned}
|
||||
return msg
|
||||
|
||||
|
||||
__all__ = [
|
||||
"_NEVER_PARALLEL_TOOLS",
|
||||
"_PARALLEL_SAFE_TOOLS",
|
||||
"_PATH_SCOPED_TOOLS",
|
||||
"_DESTRUCTIVE_PATTERNS",
|
||||
"_REDIRECT_OVERWRITE",
|
||||
"_is_destructive_command",
|
||||
"_should_parallelize_tool_batch",
|
||||
"_extract_parallel_scope_path",
|
||||
"_paths_overlap",
|
||||
"_is_multimodal_tool_result",
|
||||
"_multimodal_text_summary",
|
||||
"_append_subdir_hint_to_multimodal",
|
||||
"_extract_file_mutation_targets",
|
||||
"_extract_error_preview",
|
||||
"_trajectory_normalize_msg",
|
||||
]
|
||||
920
agent/tool_executor.py
Normal file
920
agent/tool_executor.py
Normal file
|
|
@ -0,0 +1,920 @@
|
|||
"""Tool-call execution — sequential and concurrent dispatch.
|
||||
|
||||
Both AIAgent methods (``_execute_tool_calls_sequential`` and
|
||||
``_execute_tool_calls_concurrent``) live here as module-level
|
||||
functions that take the parent ``AIAgent`` as their first argument.
|
||||
|
||||
``run_agent`` keeps thin wrappers so existing call sites work; tests
|
||||
that patch ``run_agent._set_interrupt`` are honored because the
|
||||
extracted functions reach back through the ``run_agent`` module via
|
||||
``_ra()`` for that symbol.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import concurrent.futures
|
||||
import contextvars
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, Optional
|
||||
|
||||
from agent.display import (
|
||||
KawaiiSpinner,
|
||||
build_tool_preview as _build_tool_preview,
|
||||
get_cute_tool_message as _get_cute_tool_message_impl,
|
||||
get_tool_emoji as _get_tool_emoji,
|
||||
_detect_tool_failure,
|
||||
)
|
||||
from agent.tool_guardrails import ToolGuardrailDecision
|
||||
from agent.tool_dispatch_helpers import (
|
||||
_is_destructive_command,
|
||||
_is_multimodal_tool_result,
|
||||
_multimodal_text_summary,
|
||||
_append_subdir_hint_to_multimodal,
|
||||
)
|
||||
from tools.terminal_tool import (
|
||||
_get_approval_callback,
|
||||
_get_sudo_password_callback,
|
||||
set_approval_callback as _set_approval_callback,
|
||||
set_sudo_password_callback as _set_sudo_password_callback,
|
||||
get_active_env,
|
||||
)
|
||||
from tools.tool_result_storage import (
|
||||
maybe_persist_tool_result,
|
||||
enforce_turn_budget,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Maximum number of concurrent worker threads for parallel tool execution.
|
||||
# Mirrors the constant in ``run_agent`` for tests/imports that look here.
|
||||
_MAX_TOOL_WORKERS = 8
|
||||
|
||||
|
||||
def _ra():
|
||||
"""Lazy reference to ``run_agent`` so patches like ``run_agent._set_interrupt`` work."""
|
||||
import run_agent
|
||||
return run_agent
|
||||
|
||||
|
||||
def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
||||
"""Execute multiple tool calls concurrently using a thread pool.
|
||||
|
||||
Results are collected in the original tool-call order and appended to
|
||||
messages so the API sees them in the expected sequence.
|
||||
"""
|
||||
tool_calls = assistant_message.tool_calls
|
||||
num_tools = len(tool_calls)
|
||||
|
||||
# ── Pre-flight: interrupt check ──────────────────────────────────
|
||||
if agent._interrupt_requested:
|
||||
print(f"{agent.log_prefix}⚡ Interrupt: skipping {num_tools} tool call(s)")
|
||||
for tc in tool_calls:
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"name": tc.function.name,
|
||||
"content": f"[Tool execution cancelled — {tc.function.name} was skipped due to user interrupt]",
|
||||
"tool_call_id": tc.id,
|
||||
})
|
||||
return
|
||||
|
||||
# ── Parse args + pre-execution bookkeeping ───────────────────────
|
||||
parsed_calls = [] # list of (tool_call, function_name, function_args)
|
||||
for tool_call in tool_calls:
|
||||
function_name = tool_call.function.name
|
||||
|
||||
# Reset nudge counters
|
||||
if function_name == "memory":
|
||||
agent._turns_since_memory = 0
|
||||
elif function_name == "skill_manage":
|
||||
agent._iters_since_skill = 0
|
||||
|
||||
try:
|
||||
function_args = json.loads(tool_call.function.arguments)
|
||||
except json.JSONDecodeError:
|
||||
function_args = {}
|
||||
if not isinstance(function_args, dict):
|
||||
function_args = {}
|
||||
|
||||
# Checkpoint for file-mutating tools
|
||||
if function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
|
||||
try:
|
||||
file_path = function_args.get("path", "")
|
||||
if file_path:
|
||||
work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
|
||||
agent._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Checkpoint before destructive terminal commands
|
||||
if function_name == "terminal" and agent._checkpoint_mgr.enabled:
|
||||
try:
|
||||
cmd = function_args.get("command", "")
|
||||
if _is_destructive_command(cmd):
|
||||
cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
|
||||
agent._checkpoint_mgr.ensure_checkpoint(
|
||||
cwd, f"before terminal: {cmd[:60]}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
block_result = None
|
||||
blocked_by_guardrail = False
|
||||
try:
|
||||
from hermes_cli.plugins import get_pre_tool_call_block_message
|
||||
block_message = get_pre_tool_call_block_message(
|
||||
function_name, function_args, task_id=effective_task_id or "",
|
||||
)
|
||||
except Exception:
|
||||
block_message = None
|
||||
|
||||
if block_message is not None:
|
||||
block_result = json.dumps({"error": block_message}, ensure_ascii=False)
|
||||
else:
|
||||
guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
|
||||
if not guardrail_decision.allows_execution:
|
||||
block_result = agent._guardrail_block_result(guardrail_decision)
|
||||
blocked_by_guardrail = True
|
||||
|
||||
parsed_calls.append((tool_call, function_name, function_args, block_result, blocked_by_guardrail))
|
||||
|
||||
# ── Logging / callbacks ──────────────────────────────────────────
|
||||
tool_names_str = ", ".join(name for _, name, _, _, _ in parsed_calls)
|
||||
if not agent.quiet_mode:
|
||||
print(f" ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}")
|
||||
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls, 1):
|
||||
args_str = json.dumps(args, ensure_ascii=False)
|
||||
if agent.verbose_logging:
|
||||
print(f" 📞 Tool {i}: {name}({list(args.keys())})")
|
||||
print(agent._wrap_verbose("Args: ", json.dumps(args, indent=2, ensure_ascii=False)))
|
||||
else:
|
||||
args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
|
||||
print(f" 📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")
|
||||
|
||||
for tc, name, args, block_result, blocked_by_guardrail in parsed_calls:
|
||||
if block_result is not None:
|
||||
continue
|
||||
if agent.tool_progress_callback:
|
||||
try:
|
||||
preview = _build_tool_preview(name, args)
|
||||
agent.tool_progress_callback("tool.started", name, preview, args)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool progress callback error: {cb_err}")
|
||||
|
||||
for tc, name, args, block_result, blocked_by_guardrail in parsed_calls:
|
||||
if block_result is not None:
|
||||
continue
|
||||
if agent.tool_start_callback:
|
||||
try:
|
||||
agent.tool_start_callback(tc.id, name, args)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool start callback error: {cb_err}")
|
||||
|
||||
# ── Concurrent execution ─────────────────────────────────────────
|
||||
# Each slot holds (function_name, function_args, function_result, duration, error_flag, blocked_flag)
|
||||
results = [None] * num_tools
|
||||
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls):
|
||||
if block_result is not None:
|
||||
results[i] = (name, args, block_result, 0.0, True, True)
|
||||
|
||||
# Touch activity before launching workers so the gateway knows
|
||||
# we're executing tools (not stuck).
|
||||
agent._current_tool = tool_names_str
|
||||
agent._touch_activity(f"executing {num_tools} tools concurrently: {tool_names_str}")
|
||||
|
||||
# Capture CLI callbacks from the agent thread so worker threads can
|
||||
# register them locally. Without this, _get_approval_callback() in
|
||||
# terminal_tool returns None in ThreadPoolExecutor workers, causing
|
||||
# the dangerous-command prompt to fall back to input() — which
|
||||
# deadlocks against prompt_toolkit's raw terminal mode (#13617).
|
||||
_parent_approval_cb = _get_approval_callback()
|
||||
_parent_sudo_cb = _get_sudo_password_callback()
|
||||
|
||||
def _run_tool(index, tool_call, function_name, function_args):
|
||||
"""Worker function executed in a thread."""
|
||||
# Register this worker tid so the agent can fan out an interrupt
|
||||
# to it — see AIAgent.interrupt(). Must happen first thing, and
|
||||
# must be paired with discard + clear in the finally block.
|
||||
_worker_tid = threading.current_thread().ident
|
||||
with agent._tool_worker_threads_lock:
|
||||
agent._tool_worker_threads.add(_worker_tid)
|
||||
# Race: if the agent was interrupted between fan-out (which
|
||||
# snapshotted an empty/earlier set) and our registration, apply
|
||||
# the interrupt to our own tid now so is_interrupted() inside
|
||||
# the tool returns True on the next poll.
|
||||
if agent._interrupt_requested:
|
||||
try:
|
||||
_ra()._set_interrupt(True, _worker_tid)
|
||||
except Exception:
|
||||
pass
|
||||
# Set the activity callback on THIS worker thread so
|
||||
# _wait_for_process (terminal commands) can fire heartbeats.
|
||||
# The callback is thread-local; the main thread's callback
|
||||
# is invisible to worker threads.
|
||||
try:
|
||||
from tools.environments.base import set_activity_callback
|
||||
set_activity_callback(agent._touch_activity)
|
||||
except Exception:
|
||||
pass
|
||||
# Propagate approval/sudo callbacks to this worker thread.
|
||||
# Mirrors cli.py run_agent() pattern (GHSA-qg5c-hvr5-hjgr).
|
||||
if _parent_approval_cb is not None:
|
||||
try:
|
||||
_set_approval_callback(_parent_approval_cb)
|
||||
except Exception:
|
||||
pass
|
||||
if _parent_sudo_cb is not None:
|
||||
try:
|
||||
_set_sudo_password_callback(_parent_sudo_cb)
|
||||
except Exception:
|
||||
pass
|
||||
start = time.time()
|
||||
try:
|
||||
result = agent._invoke_tool(
|
||||
function_name,
|
||||
function_args,
|
||||
effective_task_id,
|
||||
tool_call.id,
|
||||
messages=messages,
|
||||
pre_tool_block_checked=True,
|
||||
)
|
||||
except Exception as tool_error:
|
||||
result = f"Error executing tool '{function_name}': {tool_error}"
|
||||
logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
|
||||
duration = time.time() - start
|
||||
is_error, _ = _detect_tool_failure(function_name, result)
|
||||
if is_error:
|
||||
logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
|
||||
else:
|
||||
logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
|
||||
results[index] = (function_name, function_args, result, duration, is_error, False)
|
||||
# Tear down worker-tid tracking. Clear any interrupt bit we may
|
||||
# have set so the next task scheduled onto this recycled tid
|
||||
# starts with a clean slate.
|
||||
with agent._tool_worker_threads_lock:
|
||||
agent._tool_worker_threads.discard(_worker_tid)
|
||||
try:
|
||||
_ra()._set_interrupt(False, _worker_tid)
|
||||
except Exception:
|
||||
pass
|
||||
# Clear thread-local callbacks so a recycled worker thread
|
||||
# doesn't hold stale references to a disposed CLI instance.
|
||||
try:
|
||||
_set_approval_callback(None)
|
||||
_set_sudo_password_callback(None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Start spinner for CLI mode (skip when TUI handles tool progress)
|
||||
spinner = None
|
||||
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
|
||||
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
||||
spinner = KawaiiSpinner(f"{face} ⚡ running {num_tools} tools concurrently", spinner_type='dots', print_fn=agent._print_fn)
|
||||
spinner.start()
|
||||
|
||||
try:
|
||||
runnable_calls = [
|
||||
(i, tc, name, args)
|
||||
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls)
|
||||
if block_result is None
|
||||
]
|
||||
futures = []
|
||||
if runnable_calls:
|
||||
max_workers = min(len(runnable_calls), _MAX_TOOL_WORKERS)
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
for i, tc, name, args in runnable_calls:
|
||||
# Propagate ContextVars (e.g. _approval_session_key); mirrors asyncio.to_thread.
|
||||
ctx = contextvars.copy_context()
|
||||
f = executor.submit(ctx.run, _run_tool, i, tc, name, args)
|
||||
futures.append(f)
|
||||
|
||||
# Wait for all to complete with periodic heartbeats so the
|
||||
# gateway's inactivity monitor doesn't kill us during long
|
||||
# concurrent tool batches. Also check for user interrupts
|
||||
# so we don't block indefinitely when the user sends /stop
|
||||
# or a new message during concurrent tool execution.
|
||||
_conc_start = time.time()
|
||||
_interrupt_logged = False
|
||||
while True:
|
||||
done, not_done = concurrent.futures.wait(
|
||||
futures, timeout=5.0,
|
||||
)
|
||||
if not not_done:
|
||||
break
|
||||
|
||||
# Check for interrupt — the per-thread interrupt signal
|
||||
# already causes individual tools (terminal, execute_code)
|
||||
# to abort, but tools without interrupt checks (web_search,
|
||||
# read_file) will run to completion. Cancel any futures
|
||||
# that haven't started yet so we don't block on them.
|
||||
if agent._interrupt_requested:
|
||||
if not _interrupt_logged:
|
||||
_interrupt_logged = True
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}⚡ Interrupt: cancelling "
|
||||
f"{len(not_done)} pending concurrent tool(s)",
|
||||
force=True,
|
||||
)
|
||||
for f in not_done:
|
||||
f.cancel()
|
||||
# Give already-running tools a moment to notice the
|
||||
# per-thread interrupt signal and exit gracefully.
|
||||
concurrent.futures.wait(not_done, timeout=3.0)
|
||||
break
|
||||
|
||||
_conc_elapsed = int(time.time() - _conc_start)
|
||||
# Heartbeat every ~30s (6 × 5s poll intervals)
|
||||
if _conc_elapsed > 0 and _conc_elapsed % 30 < 6:
|
||||
_still_running = [
|
||||
parsed_calls[futures.index(f)][1]
|
||||
for f in not_done
|
||||
if f in futures
|
||||
]
|
||||
agent._touch_activity(
|
||||
f"concurrent tools running ({_conc_elapsed}s, "
|
||||
f"{len(not_done)} remaining: {', '.join(_still_running[:3])})"
|
||||
)
|
||||
finally:
|
||||
if spinner:
|
||||
# Build a summary message for the spinner stop
|
||||
completed = sum(1 for r in results if r is not None)
|
||||
total_dur = sum(r[3] for r in results if r is not None)
|
||||
spinner.stop(f"⚡ {completed}/{num_tools} tools completed in {total_dur:.1f}s total")
|
||||
|
||||
# ── Post-execution: display per-tool results ─────────────────────
|
||||
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls):
|
||||
r = results[i]
|
||||
blocked = False
|
||||
if r is None:
|
||||
# Tool was cancelled (interrupt) or thread didn't return
|
||||
if agent._interrupt_requested:
|
||||
function_result = f"[Tool execution cancelled — {name} was skipped due to user interrupt]"
|
||||
else:
|
||||
function_result = f"Error executing tool '{name}': thread did not return a result"
|
||||
tool_duration = 0.0
|
||||
else:
|
||||
function_name, function_args, function_result, tool_duration, is_error, blocked = r
|
||||
|
||||
if not blocked:
|
||||
function_result = agent._append_guardrail_observation(
|
||||
function_name,
|
||||
function_args,
|
||||
function_result,
|
||||
failed=is_error,
|
||||
)
|
||||
|
||||
if is_error:
|
||||
_err_text = _multimodal_text_summary(function_result)
|
||||
result_preview = _err_text[:200] if len(_err_text) > 200 else _err_text
|
||||
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
|
||||
|
||||
# Track file-mutation outcome for the turn-end verifier.
|
||||
# `blocked` calls never actually ran — don't let a guardrail
|
||||
# block count as either a failure or a success.
|
||||
if not blocked:
|
||||
try:
|
||||
agent._record_file_mutation_result(
|
||||
function_name, function_args, function_result, is_error,
|
||||
)
|
||||
except Exception as _ver_err:
|
||||
logging.debug("file-mutation verifier record failed: %s", _ver_err)
|
||||
|
||||
if not blocked and agent.tool_progress_callback:
|
||||
try:
|
||||
agent.tool_progress_callback(
|
||||
"tool.completed", function_name, None, None,
|
||||
duration=tool_duration, is_error=is_error,
|
||||
)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool progress callback error: {cb_err}")
|
||||
|
||||
if agent.verbose_logging:
|
||||
logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
|
||||
logging.debug(f"Tool result ({len(function_result)} chars): {function_result}")
|
||||
|
||||
# Print cute message per tool
|
||||
if agent._should_emit_quiet_tool_messages():
|
||||
cute_msg = _get_cute_tool_message_impl(name, args, tool_duration, result=function_result)
|
||||
agent._safe_print(f" {cute_msg}")
|
||||
elif not agent.quiet_mode:
|
||||
_preview_str = _multimodal_text_summary(function_result)
|
||||
if agent.verbose_logging:
|
||||
print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s")
|
||||
print(agent._wrap_verbose("Result: ", _preview_str))
|
||||
else:
|
||||
response_preview = _preview_str[:agent.log_prefix_chars] + "..." if len(_preview_str) > agent.log_prefix_chars else _preview_str
|
||||
print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s - {response_preview}")
|
||||
|
||||
agent._current_tool = None
|
||||
agent._touch_activity(f"tool completed: {name} ({tool_duration:.1f}s)")
|
||||
|
||||
if not blocked and agent.tool_complete_callback:
|
||||
try:
|
||||
agent.tool_complete_callback(tc.id, name, args, function_result)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool complete callback error: {cb_err}")
|
||||
|
||||
function_result = maybe_persist_tool_result(
|
||||
content=function_result,
|
||||
tool_name=name,
|
||||
tool_use_id=tc.id,
|
||||
env=get_active_env(effective_task_id),
|
||||
) if not _is_multimodal_tool_result(function_result) else function_result
|
||||
|
||||
subdir_hints = agent._subdirectory_hints.check_tool_call(name, args)
|
||||
if subdir_hints:
|
||||
if _is_multimodal_tool_result(function_result):
|
||||
# Append the hint to the text summary part so the model
|
||||
# still sees it; don't touch the image blocks.
|
||||
_append_subdir_hint_to_multimodal(function_result, subdir_hints)
|
||||
else:
|
||||
function_result += subdir_hints
|
||||
|
||||
# Unwrap _multimodal dicts to an OpenAI-style content list so any
|
||||
# vision-capable provider receives [{type:text},{type:image_url}]
|
||||
# rather than a raw Python dict. The Anthropic adapter already
|
||||
# accepts content lists; vision-capable OpenAI-compatible servers
|
||||
# (mlx-vlm, GPT-4o, …) accept image_url in tool messages natively.
|
||||
# Text-only servers get a string-safe fallback here so a rejected
|
||||
# image tool result never poisons canonical session history.
|
||||
# String results pass through unchanged.
|
||||
_tool_content = agent._tool_result_content_for_active_model(name, function_result)
|
||||
tool_msg = {
|
||||
"role": "tool",
|
||||
"name": name,
|
||||
"content": _tool_content,
|
||||
"tool_call_id": tc.id,
|
||||
}
|
||||
messages.append(tool_msg)
|
||||
|
||||
# ── Per-tool /steer drain ───────────────────────────────────
|
||||
# Same as the sequential path: drain between each collected
|
||||
# result so the steer lands as early as possible.
|
||||
agent._apply_pending_steer_to_tool_results(messages, 1)
|
||||
|
||||
# ── Per-turn aggregate budget enforcement ─────────────────────────
|
||||
num_tools = len(parsed_calls)
|
||||
if num_tools > 0:
|
||||
turn_tool_msgs = messages[-num_tools:]
|
||||
enforce_turn_budget(turn_tool_msgs, env=get_active_env(effective_task_id))
|
||||
|
||||
# ── /steer injection ──────────────────────────────────────────────
|
||||
# Append any pending user steer text to the last tool result so the
|
||||
# agent sees it on its next iteration. Runs AFTER budget enforcement
|
||||
# so the steer marker is never truncated. See steer() for details.
|
||||
if num_tools > 0:
|
||||
agent._apply_pending_steer_to_tool_results(messages, num_tools)
|
||||
|
||||
|
||||
|
||||
def execute_tool_calls_sequential(agent, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
||||
"""Execute tool calls sequentially (original behavior). Used for single calls or interactive tools."""
|
||||
for i, tool_call in enumerate(assistant_message.tool_calls, 1):
|
||||
# SAFETY: check interrupt BEFORE starting each tool.
|
||||
# If the user sent "stop" during a previous tool's execution,
|
||||
# do NOT start any more tools -- skip them all immediately.
|
||||
if agent._interrupt_requested:
|
||||
remaining_calls = assistant_message.tool_calls[i-1:]
|
||||
if remaining_calls:
|
||||
agent._vprint(f"{agent.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)", force=True)
|
||||
for skipped_tc in remaining_calls:
|
||||
skipped_name = skipped_tc.function.name
|
||||
skip_msg = {
|
||||
"role": "tool",
|
||||
"name": skipped_name,
|
||||
"content": f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]",
|
||||
"tool_call_id": skipped_tc.id,
|
||||
}
|
||||
messages.append(skip_msg)
|
||||
break
|
||||
|
||||
function_name = tool_call.function.name
|
||||
|
||||
try:
|
||||
function_args = json.loads(tool_call.function.arguments)
|
||||
except json.JSONDecodeError as e:
|
||||
logging.warning(f"Unexpected JSON error after validation: {e}")
|
||||
function_args = {}
|
||||
if not isinstance(function_args, dict):
|
||||
function_args = {}
|
||||
|
||||
# Check plugin hooks for a block directive before executing.
|
||||
_block_msg: Optional[str] = None
|
||||
try:
|
||||
from hermes_cli.plugins import get_pre_tool_call_block_message
|
||||
_block_msg = get_pre_tool_call_block_message(
|
||||
function_name, function_args, task_id=effective_task_id or "",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_guardrail_block_decision: ToolGuardrailDecision | None = None
|
||||
if _block_msg is None:
|
||||
guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
|
||||
if not guardrail_decision.allows_execution:
|
||||
_guardrail_block_decision = guardrail_decision
|
||||
|
||||
_execution_blocked = _block_msg is not None or _guardrail_block_decision is not None
|
||||
|
||||
if _execution_blocked:
|
||||
# Tool blocked by plugin or guardrail policy — skip counters,
|
||||
# callbacks, checkpointing, activity mutation, and real execution.
|
||||
pass
|
||||
# Reset nudge counters when the relevant tool is actually used
|
||||
elif function_name == "memory":
|
||||
agent._turns_since_memory = 0
|
||||
elif function_name == "skill_manage":
|
||||
agent._iters_since_skill = 0
|
||||
|
||||
if not agent.quiet_mode:
|
||||
args_str = json.dumps(function_args, ensure_ascii=False)
|
||||
if agent.verbose_logging:
|
||||
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())})")
|
||||
print(agent._wrap_verbose("Args: ", json.dumps(function_args, indent=2, ensure_ascii=False)))
|
||||
else:
|
||||
args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
|
||||
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
|
||||
|
||||
if not _execution_blocked:
|
||||
agent._current_tool = function_name
|
||||
agent._touch_activity(f"executing tool: {function_name}")
|
||||
|
||||
# Set activity callback for long-running tool execution (terminal
|
||||
# commands, etc.) so the gateway's inactivity monitor doesn't kill
|
||||
# the agent while a command is running.
|
||||
if not _execution_blocked:
|
||||
try:
|
||||
from tools.environments.base import set_activity_callback
|
||||
set_activity_callback(agent._touch_activity)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not _execution_blocked and agent.tool_progress_callback:
|
||||
try:
|
||||
preview = _build_tool_preview(function_name, function_args)
|
||||
agent.tool_progress_callback("tool.started", function_name, preview, function_args)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool progress callback error: {cb_err}")
|
||||
|
||||
if not _execution_blocked and agent.tool_start_callback:
|
||||
try:
|
||||
agent.tool_start_callback(tool_call.id, function_name, function_args)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool start callback error: {cb_err}")
|
||||
|
||||
# Checkpoint: snapshot working dir before file-mutating tools
|
||||
if not _execution_blocked and function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
|
||||
try:
|
||||
file_path = function_args.get("path", "")
|
||||
if file_path:
|
||||
work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
|
||||
agent._checkpoint_mgr.ensure_checkpoint(
|
||||
work_dir, f"before {function_name}"
|
||||
)
|
||||
except Exception:
|
||||
pass # never block tool execution
|
||||
|
||||
# Checkpoint before destructive terminal commands
|
||||
if not _execution_blocked and function_name == "terminal" and agent._checkpoint_mgr.enabled:
|
||||
try:
|
||||
cmd = function_args.get("command", "")
|
||||
if _is_destructive_command(cmd):
|
||||
cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
|
||||
agent._checkpoint_mgr.ensure_checkpoint(
|
||||
cwd, f"before terminal: {cmd[:60]}"
|
||||
)
|
||||
except Exception:
|
||||
pass # never block tool execution
|
||||
|
||||
tool_start_time = time.time()
|
||||
|
||||
if _block_msg is not None:
|
||||
# Tool blocked by plugin policy — return error without executing.
|
||||
function_result = json.dumps({"error": _block_msg}, ensure_ascii=False)
|
||||
tool_duration = 0.0
|
||||
elif _guardrail_block_decision is not None:
|
||||
# Tool blocked by tool-loop guardrail — synthesize exactly one
|
||||
# tool result for the original tool_call_id without executing.
|
||||
function_result = agent._guardrail_block_result(_guardrail_block_decision)
|
||||
tool_duration = 0.0
|
||||
elif function_name == "todo":
|
||||
from tools.todo_tool import todo_tool as _todo_tool
|
||||
function_result = _todo_tool(
|
||||
todos=function_args.get("todos"),
|
||||
merge=function_args.get("merge", False),
|
||||
store=agent._todo_store,
|
||||
)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "session_search":
|
||||
session_db = agent._get_session_db_for_recall()
|
||||
if not session_db:
|
||||
from hermes_state import format_session_db_unavailable
|
||||
function_result = json.dumps({"success": False, "error": format_session_db_unavailable()})
|
||||
else:
|
||||
from tools.session_search_tool import session_search as _session_search
|
||||
function_result = _session_search(
|
||||
query=function_args.get("query", ""),
|
||||
role_filter=function_args.get("role_filter"),
|
||||
limit=function_args.get("limit", 3),
|
||||
db=session_db,
|
||||
current_session_id=agent.session_id,
|
||||
)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "memory":
|
||||
target = function_args.get("target", "memory")
|
||||
from tools.memory_tool import memory_tool as _memory_tool
|
||||
function_result = _memory_tool(
|
||||
action=function_args.get("action"),
|
||||
target=target,
|
||||
content=function_args.get("content"),
|
||||
old_text=function_args.get("old_text"),
|
||||
store=agent._memory_store,
|
||||
)
|
||||
# Bridge: notify external memory provider of built-in memory writes
|
||||
if agent._memory_manager and function_args.get("action") in {"add", "replace"}:
|
||||
try:
|
||||
agent._memory_manager.on_memory_write(
|
||||
function_args.get("action", ""),
|
||||
target,
|
||||
function_args.get("content", ""),
|
||||
metadata=agent._build_memory_write_metadata(
|
||||
task_id=effective_task_id,
|
||||
tool_call_id=getattr(tool_call, "id", None),
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "clarify":
|
||||
from tools.clarify_tool import clarify_tool as _clarify_tool
|
||||
function_result = _clarify_tool(
|
||||
question=function_args.get("question", ""),
|
||||
choices=function_args.get("choices"),
|
||||
callback=agent.clarify_callback,
|
||||
)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "delegate_task":
|
||||
tasks_arg = function_args.get("tasks")
|
||||
if tasks_arg and isinstance(tasks_arg, list):
|
||||
spinner_label = f"🔀 delegating {len(tasks_arg)} tasks"
|
||||
else:
|
||||
goal_preview = (function_args.get("goal") or "")[:30]
|
||||
spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating"
|
||||
spinner = None
|
||||
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
|
||||
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
||||
spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots', print_fn=agent._print_fn)
|
||||
spinner.start()
|
||||
agent._delegate_spinner = spinner
|
||||
_delegate_result = None
|
||||
try:
|
||||
function_result = agent._dispatch_delegate_task(function_args)
|
||||
_delegate_result = function_result
|
||||
finally:
|
||||
agent._delegate_spinner = None
|
||||
tool_duration = time.time() - tool_start_time
|
||||
cute_msg = _get_cute_tool_message_impl('delegate_task', function_args, tool_duration, result=_delegate_result)
|
||||
if spinner:
|
||||
spinner.stop(cute_msg)
|
||||
elif agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {cute_msg}")
|
||||
elif agent._context_engine_tool_names and function_name in agent._context_engine_tool_names:
|
||||
# Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.)
|
||||
spinner = None
|
||||
if agent._should_emit_quiet_tool_messages():
|
||||
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
||||
emoji = _get_tool_emoji(function_name)
|
||||
preview = _build_tool_preview(function_name, function_args) or function_name
|
||||
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
|
||||
spinner.start()
|
||||
_ce_result = None
|
||||
try:
|
||||
function_result = agent.context_compressor.handle_tool_call(function_name, function_args, messages=messages)
|
||||
_ce_result = function_result
|
||||
except Exception as tool_error:
|
||||
function_result = json.dumps({"error": f"Context engine tool '{function_name}' failed: {tool_error}"})
|
||||
logger.error("context_engine.handle_tool_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
||||
finally:
|
||||
tool_duration = time.time() - tool_start_time
|
||||
cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_ce_result)
|
||||
if spinner:
|
||||
spinner.stop(cute_msg)
|
||||
elif agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {cute_msg}")
|
||||
elif agent._memory_manager and agent._memory_manager.has_tool(function_name):
|
||||
# Memory provider tools (hindsight_retain, honcho_search, etc.)
|
||||
# These are not in the tool registry — route through MemoryManager.
|
||||
spinner = None
|
||||
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
|
||||
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
||||
emoji = _get_tool_emoji(function_name)
|
||||
preview = _build_tool_preview(function_name, function_args) or function_name
|
||||
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
|
||||
spinner.start()
|
||||
_mem_result = None
|
||||
try:
|
||||
function_result = agent._memory_manager.handle_tool_call(function_name, function_args)
|
||||
_mem_result = function_result
|
||||
except Exception as tool_error:
|
||||
function_result = json.dumps({"error": f"Memory tool '{function_name}' failed: {tool_error}"})
|
||||
logger.error("memory_manager.handle_tool_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
||||
finally:
|
||||
tool_duration = time.time() - tool_start_time
|
||||
cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_mem_result)
|
||||
if spinner:
|
||||
spinner.stop(cute_msg)
|
||||
elif agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {cute_msg}")
|
||||
elif agent.quiet_mode:
|
||||
spinner = None
|
||||
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
|
||||
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
||||
emoji = _get_tool_emoji(function_name)
|
||||
preview = _build_tool_preview(function_name, function_args) or function_name
|
||||
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
|
||||
spinner.start()
|
||||
_spinner_result = None
|
||||
try:
|
||||
function_result = _ra().handle_function_call(
|
||||
function_name, function_args, effective_task_id,
|
||||
tool_call_id=tool_call.id,
|
||||
session_id=agent.session_id or "",
|
||||
enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
|
||||
skip_pre_tool_call_hook=True,
|
||||
)
|
||||
_spinner_result = function_result
|
||||
except Exception as tool_error:
|
||||
function_result = f"Error executing tool '{function_name}': {tool_error}"
|
||||
logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
||||
finally:
|
||||
tool_duration = time.time() - tool_start_time
|
||||
cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
|
||||
if spinner:
|
||||
spinner.stop(cute_msg)
|
||||
elif agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {cute_msg}")
|
||||
else:
|
||||
try:
|
||||
function_result = _ra().handle_function_call(
|
||||
function_name, function_args, effective_task_id,
|
||||
tool_call_id=tool_call.id,
|
||||
session_id=agent.session_id or "",
|
||||
enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
|
||||
skip_pre_tool_call_hook=True,
|
||||
)
|
||||
except Exception as tool_error:
|
||||
function_result = f"Error executing tool '{function_name}': {tool_error}"
|
||||
logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
|
||||
if isinstance(function_result, str):
|
||||
result_preview = function_result if agent.verbose_logging else (
|
||||
function_result[:200] if len(function_result) > 200 else function_result
|
||||
)
|
||||
_result_len = len(function_result)
|
||||
else:
|
||||
# Multimodal dict result (_multimodal=True) — not sliceable as string
|
||||
result_preview = function_result
|
||||
_result_len = len(str(function_result))
|
||||
|
||||
# Log tool errors to the persistent error log so [error] tags
|
||||
# in the UI always have a corresponding detailed entry on disk.
|
||||
_is_error_result, _ = _detect_tool_failure(function_name, function_result)
|
||||
if not _execution_blocked:
|
||||
function_result = agent._append_guardrail_observation(
|
||||
function_name,
|
||||
function_args,
|
||||
function_result,
|
||||
failed=_is_error_result,
|
||||
)
|
||||
result_preview = function_result if agent.verbose_logging else (
|
||||
function_result[:200] if len(function_result) > 200 else function_result
|
||||
)
|
||||
if _is_error_result:
|
||||
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
|
||||
else:
|
||||
logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, _result_len)
|
||||
|
||||
# Track file-mutation outcome for the turn-end verifier. See
|
||||
# the concurrent path for the rationale; both paths must feed
|
||||
# the same state so the footer reflects every tool call in the
|
||||
# turn, not just the parallel ones.
|
||||
if not _execution_blocked:
|
||||
try:
|
||||
agent._record_file_mutation_result(
|
||||
function_name, function_args, function_result, _is_error_result,
|
||||
)
|
||||
except Exception as _ver_err:
|
||||
logging.debug("file-mutation verifier record failed: %s", _ver_err)
|
||||
|
||||
if not _execution_blocked and agent.tool_progress_callback:
|
||||
try:
|
||||
agent.tool_progress_callback(
|
||||
"tool.completed", function_name, None, None,
|
||||
duration=tool_duration, is_error=_is_error_result,
|
||||
)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool progress callback error: {cb_err}")
|
||||
|
||||
agent._current_tool = None
|
||||
agent._touch_activity(f"tool completed: {function_name} ({tool_duration:.1f}s)")
|
||||
|
||||
if agent.verbose_logging:
|
||||
logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
|
||||
_log_result = _multimodal_text_summary(function_result)
|
||||
logging.debug(f"Tool result ({len(_log_result)} chars): {_log_result}")
|
||||
|
||||
if not _execution_blocked and agent.tool_complete_callback:
|
||||
try:
|
||||
agent.tool_complete_callback(tool_call.id, function_name, function_args, function_result)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool complete callback error: {cb_err}")
|
||||
|
||||
function_result = maybe_persist_tool_result(
|
||||
content=function_result,
|
||||
tool_name=function_name,
|
||||
tool_use_id=tool_call.id,
|
||||
env=get_active_env(effective_task_id),
|
||||
) if not _is_multimodal_tool_result(function_result) else function_result
|
||||
|
||||
# Discover subdirectory context files from tool arguments
|
||||
subdir_hints = agent._subdirectory_hints.check_tool_call(function_name, function_args)
|
||||
if subdir_hints:
|
||||
if _is_multimodal_tool_result(function_result):
|
||||
_append_subdir_hint_to_multimodal(function_result, subdir_hints)
|
||||
else:
|
||||
function_result += subdir_hints
|
||||
|
||||
# Unwrap _multimodal dicts to an OpenAI-style content list
|
||||
# (see parallel path for rationale). String results pass through.
|
||||
_tool_content = agent._tool_result_content_for_active_model(function_name, function_result)
|
||||
tool_msg = {
|
||||
"role": "tool",
|
||||
"name": function_name,
|
||||
"content": _tool_content,
|
||||
"tool_call_id": tool_call.id
|
||||
}
|
||||
messages.append(tool_msg)
|
||||
|
||||
# ── Per-tool /steer drain ───────────────────────────────────
|
||||
# Drain pending steer BETWEEN individual tool calls so the
|
||||
# injection lands as soon as a tool finishes — not after the
|
||||
# entire batch. The model sees it on the next API iteration.
|
||||
agent._apply_pending_steer_to_tool_results(messages, 1)
|
||||
|
||||
if not agent.quiet_mode:
|
||||
if agent.verbose_logging:
|
||||
print(f" ✅ Tool {i} completed in {tool_duration:.2f}s")
|
||||
print(agent._wrap_verbose("Result: ", function_result))
|
||||
else:
|
||||
_fr_str = function_result if isinstance(function_result, str) else str(function_result)
|
||||
response_preview = _fr_str[:agent.log_prefix_chars] + "..." if len(_fr_str) > agent.log_prefix_chars else _fr_str
|
||||
print(f" ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}")
|
||||
|
||||
if agent._interrupt_requested and i < len(assistant_message.tool_calls):
|
||||
remaining = len(assistant_message.tool_calls) - i
|
||||
agent._vprint(f"{agent.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)", force=True)
|
||||
for skipped_tc in assistant_message.tool_calls[i:]:
|
||||
skipped_name = skipped_tc.function.name
|
||||
skip_msg = {
|
||||
"role": "tool",
|
||||
"name": skipped_name,
|
||||
"content": f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]",
|
||||
"tool_call_id": skipped_tc.id
|
||||
}
|
||||
messages.append(skip_msg)
|
||||
break
|
||||
|
||||
if agent.tool_delay > 0 and i < len(assistant_message.tool_calls):
|
||||
time.sleep(agent.tool_delay)
|
||||
|
||||
# ── Per-turn aggregate budget enforcement ─────────────────────────
|
||||
num_tools_seq = len(assistant_message.tool_calls)
|
||||
if num_tools_seq > 0:
|
||||
enforce_turn_budget(messages[-num_tools_seq:], env=get_active_env(effective_task_id))
|
||||
|
||||
# ── /steer injection ──────────────────────────────────────────────
|
||||
# See _execute_tool_calls_parallel for the rationale. Same hook,
|
||||
# applied to sequential execution as well.
|
||||
if num_tools_seq > 0:
|
||||
agent._apply_pending_steer_to_tool_results(messages, num_tools_seq)
|
||||
|
||||
|
||||
|
||||
|
||||
__all__ = [
|
||||
"execute_tool_calls_concurrent",
|
||||
"execute_tool_calls_sequential",
|
||||
]
|
||||
12953
run_agent.py
12953
run_agent.py
File diff suppressed because it is too large
Load diff
|
|
@ -73,15 +73,20 @@ class TestAgentLoopSourceStillHasCarveOut:
|
|||
revert that happens to leave the test file intact."""
|
||||
|
||||
def test_run_agent_excludes_jsondecodeerror_from_local_validation(self):
|
||||
import run_agent
|
||||
import inspect
|
||||
src = inspect.getsource(run_agent)
|
||||
from agent import conversation_loop
|
||||
# The agent loop body lives in agent/conversation_loop.py after
|
||||
# the run_agent.py refactor. Assert the carve-out is present in
|
||||
# the extracted module specifically — if it ever moves back or
|
||||
# disappears, this fails loudly rather than silently passing
|
||||
# against a non-existent inline replica.
|
||||
src = inspect.getsource(conversation_loop)
|
||||
# The predicate we care about must reference json.JSONDecodeError
|
||||
# in its exclusion tuple. We check for the specific co-occurrence
|
||||
# rather than the literal string so harmless reformatting doesn't
|
||||
# break us.
|
||||
assert "is_local_validation_error" in src
|
||||
assert "JSONDecodeError" in src, (
|
||||
"run_agent.py must carve out json.JSONDecodeError from the "
|
||||
"is_local_validation_error classification — see #14782."
|
||||
"agent/conversation_loop.py must carve out json.JSONDecodeError "
|
||||
"from the is_local_validation_error classification — see #14782."
|
||||
)
|
||||
|
|
|
|||
|
|
@ -120,10 +120,22 @@ def test_production_code_contains_hydration_block():
|
|||
"""Smoke test: confirm the hydration code is actually wired into
|
||||
run_conversation(). If someone deletes it, tests above still pass
|
||||
against the inline replica — this fails them awake.
|
||||
|
||||
After the run_agent.py refactor the agent-loop body lives in
|
||||
``agent/conversation_loop.py`` and uses ``agent.X`` rather than
|
||||
``self.X``. Assert the block is present in the extracted module
|
||||
specifically — if it ever drifts back into run_agent.py or
|
||||
disappears entirely, this guard fails loudly.
|
||||
"""
|
||||
from pathlib import Path
|
||||
src = Path(__file__).resolve().parents[2] / "run_agent.py"
|
||||
content = src.read_text(encoding="utf-8")
|
||||
repo = Path(__file__).resolve().parents[2]
|
||||
cl_path = repo / "agent" / "conversation_loop.py"
|
||||
src_cl = cl_path.read_text(encoding="utf-8")
|
||||
# Anchor on the unique comment + the modulo line.
|
||||
assert "Hydrate per-session nudge counters from persisted history" in content
|
||||
assert "self._turns_since_memory = prior_user_turns % self._memory_nudge_interval" in content
|
||||
assert "Hydrate per-session nudge counters from persisted history" in src_cl, (
|
||||
f"Hydration comment missing from {cl_path}"
|
||||
)
|
||||
assert (
|
||||
"agent._turns_since_memory = prior_user_turns % agent._memory_nudge_interval"
|
||||
in src_cl
|
||||
), f"Hydration modulo assignment missing from {cl_path}"
|
||||
|
|
|
|||
|
|
@ -4879,23 +4879,26 @@ class TestAnthropicInterruptHandler:
|
|||
def test_interruptible_has_anthropic_branch(self):
|
||||
"""The interrupt handler must check api_mode == 'anthropic_messages'."""
|
||||
import inspect
|
||||
source = inspect.getsource(AIAgent._interruptible_api_call)
|
||||
from agent.chat_completion_helpers import interruptible_api_call
|
||||
source = inspect.getsource(interruptible_api_call)
|
||||
assert "anthropic_messages" in source, \
|
||||
"_interruptible_api_call must handle Anthropic interrupt (api_mode check)"
|
||||
"interruptible_api_call must handle Anthropic interrupt (api_mode check)"
|
||||
|
||||
def test_interruptible_rebuilds_anthropic_client(self):
|
||||
"""After interrupting, the Anthropic client should be rebuilt."""
|
||||
import inspect
|
||||
source = inspect.getsource(AIAgent._interruptible_api_call)
|
||||
from agent.chat_completion_helpers import interruptible_api_call
|
||||
source = inspect.getsource(interruptible_api_call)
|
||||
assert "build_anthropic_client" in source, \
|
||||
"_interruptible_api_call must rebuild Anthropic client after interrupt"
|
||||
"interruptible_api_call must rebuild Anthropic client after interrupt"
|
||||
|
||||
def test_streaming_has_anthropic_branch(self):
|
||||
"""_streaming_api_call must also handle Anthropic interrupt."""
|
||||
import inspect
|
||||
source = inspect.getsource(AIAgent._interruptible_streaming_api_call)
|
||||
from agent.chat_completion_helpers import interruptible_streaming_api_call
|
||||
source = inspect.getsource(interruptible_streaming_api_call)
|
||||
assert "anthropic_messages" in source, \
|
||||
"_streaming_api_call must handle Anthropic interrupt"
|
||||
"interruptible_streaming_api_call must handle Anthropic interrupt"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -5304,14 +5307,20 @@ class TestMemoryNudgeCounterPersistence:
|
|||
def test_counters_not_reset_in_preamble(self):
|
||||
"""The run_conversation preamble must not zero the nudge counters."""
|
||||
import inspect
|
||||
src = inspect.getsource(AIAgent.run_conversation)
|
||||
from agent.conversation_loop import run_conversation as _rc
|
||||
src = inspect.getsource(_rc)
|
||||
# The preamble resets many fields (retry counts, budget, etc.)
|
||||
# before the main loop. Find that reset block and verify our
|
||||
# counters aren't in it. The reset block ends at iteration_budget.
|
||||
preamble_end = src.index("self.iteration_budget = IterationBudget")
|
||||
# The extracted body uses ``agent.X`` (not ``self.X``). Anchor
|
||||
# exactly on ``agent.iteration_budget = IterationBudget`` so an
|
||||
# unrelated identifier ending in ``iteration_budget`` (e.g.
|
||||
# ``_iteration_budget`` or ``shared_iteration_budget``) can't
|
||||
# match the boundary.
|
||||
preamble_end = src.index("agent.iteration_budget = IterationBudget")
|
||||
preamble = src[:preamble_end]
|
||||
assert "self._turns_since_memory = 0" not in preamble
|
||||
assert "self._iters_since_skill = 0" not in preamble
|
||||
assert "agent._turns_since_memory = 0" not in preamble
|
||||
assert "agent._iters_since_skill = 0" not in preamble
|
||||
|
||||
|
||||
class TestDeadRetryCode:
|
||||
|
|
@ -5319,7 +5328,8 @@ class TestDeadRetryCode:
|
|||
|
||||
def test_no_unreachable_max_retries_after_backoff(self):
|
||||
import inspect
|
||||
source = inspect.getsource(AIAgent.run_conversation)
|
||||
from agent.conversation_loop import run_conversation as _rc
|
||||
source = inspect.getsource(_rc)
|
||||
occurrences = source.count("if retry_count >= max_retries:")
|
||||
assert occurrences == 2, (
|
||||
f"Expected 2 occurrences of 'if retry_count >= max_retries:' "
|
||||
|
|
@ -5357,7 +5367,8 @@ class TestMemoryContextSanitization:
|
|||
a literal <memory-context> tag we don't silently delete their text.
|
||||
The streaming scrubber + plugin-side scrub cover real leak paths."""
|
||||
import inspect
|
||||
src = inspect.getsource(AIAgent.run_conversation)
|
||||
from agent.conversation_loop import run_conversation as _rc
|
||||
src = inspect.getsource(_rc)
|
||||
assert "sanitize_context(user_message)" not in src
|
||||
assert "sanitize_context(persist_user_message)" not in src
|
||||
|
||||
|
|
@ -5393,7 +5404,8 @@ class TestMemoryProviderTurnStart:
|
|||
def test_on_turn_start_called_before_prefetch(self):
|
||||
"""Source-level check: on_turn_start appears before prefetch_all in run_conversation."""
|
||||
import inspect
|
||||
src = inspect.getsource(AIAgent.run_conversation)
|
||||
from agent.conversation_loop import run_conversation as _rc
|
||||
src = inspect.getsource(_rc)
|
||||
# Find the actual method calls, not comments
|
||||
idx_turn_start = src.index(".on_turn_start(")
|
||||
idx_prefetch = src.index(".prefetch_all(")
|
||||
|
|
@ -5403,7 +5415,10 @@ class TestMemoryProviderTurnStart:
|
|||
)
|
||||
|
||||
def test_on_turn_start_uses_user_turn_count(self):
|
||||
"""Source-level check: on_turn_start receives self._user_turn_count."""
|
||||
"""Source-level check: on_turn_start receives the user_turn_count."""
|
||||
import inspect
|
||||
src = inspect.getsource(AIAgent.run_conversation)
|
||||
assert "on_turn_start(self._user_turn_count" in src
|
||||
from agent.conversation_loop import run_conversation as _rc
|
||||
src = inspect.getsource(_rc)
|
||||
# The extracted body uses ``agent.X`` rather than ``self.X``;
|
||||
# assert the extracted-form spelling directly.
|
||||
assert "on_turn_start(agent._user_turn_count" in src
|
||||
|
|
|
|||
|
|
@ -152,19 +152,28 @@ def test_run_agent_concurrent_executor_wraps_submit_with_copy_context():
|
|||
import inspect
|
||||
|
||||
import run_agent
|
||||
from agent import tool_executor as tool_executor_module
|
||||
|
||||
src_path = inspect.getsourcefile(run_agent)
|
||||
assert src_path is not None
|
||||
tree = ast.parse(open(src_path, encoding="utf-8").read())
|
||||
# Source for both modules — the concurrent-executor body lives in
|
||||
# ``agent/tool_executor.py`` after the run_agent.py refactor (PR
|
||||
# following #16660). Search both so this guard keeps firing
|
||||
# regardless of where the call site lives.
|
||||
sources = []
|
||||
for mod in (run_agent, tool_executor_module):
|
||||
src_path = inspect.getsourcefile(mod)
|
||||
assert src_path is not None
|
||||
sources.append((src_path, open(src_path, encoding="utf-8").read()))
|
||||
|
||||
submit_calls_in_agent: list[ast.Call] = []
|
||||
for node in ast.walk(tree):
|
||||
if not isinstance(node, ast.Call):
|
||||
continue
|
||||
func = node.func
|
||||
# Match executor.submit(...) style calls.
|
||||
if isinstance(func, ast.Attribute) and func.attr == "submit":
|
||||
submit_calls_in_agent.append(node)
|
||||
for _src_path, src_text in sources:
|
||||
tree = ast.parse(src_text)
|
||||
for node in ast.walk(tree):
|
||||
if not isinstance(node, ast.Call):
|
||||
continue
|
||||
func = node.func
|
||||
# Match executor.submit(...) style calls.
|
||||
if isinstance(func, ast.Attribute) and func.attr == "submit":
|
||||
submit_calls_in_agent.append(node)
|
||||
|
||||
# Filter to the submit call inside the concurrent tool executor —
|
||||
# identifiable by passing `_run_tool` as its target. Other submit()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue