mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-03 02:11:48 +00:00
agent_loop.py: - Add _truncate_context() with 2-phase strategy (truncate tool results, then drop oldest middle messages while keeping assistant+tool pairs) - Add max_context_tokens parameter - Guard against double-encoded JSON tool arguments (model outputs string instead of dict) hermes_base_env.py: - Wire max_context_tokens=max_token_length through all 3 HermesAgentLoop construction sites hermes_parser.py: - Prevent double-encoding: when arguments are already a string, use as-is instead of json.dumps() which would double-encode swe_smith_oracle_env.py: - Shaped reward structure for cold-start training: 0.0 (no tools) -> 0.05/call up to 0.3 -> 0.4 (install ok) -> 1.0 (tests pass) - _build_scored_item() override: truncate tokens/masks from END to fit max_token_len instead of discarding entire groups All changes are in environments/ only — no effect on TUI/CLI agent loop.
79 lines
2.6 KiB
Python
79 lines
2.6 KiB
Python
"""
|
|
Hermes tool call parser.
|
|
|
|
Format: <tool_call>{"name": "func", "arguments": {...}}</tool_call>
|
|
Based on VLLM's Hermes2ProToolParser.extract_tool_calls()
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
import uuid
|
|
from typing import List, Optional, Tuple
|
|
|
|
from openai.types.chat.chat_completion_message_tool_call import (
|
|
ChatCompletionMessageToolCall,
|
|
Function,
|
|
)
|
|
|
|
from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser
|
|
|
|
|
|
@register_parser("hermes")
|
|
class HermesToolCallParser(ToolCallParser):
|
|
"""
|
|
Parser for Hermes-format tool calls.
|
|
|
|
Matches <tool_call>...</tool_call> tags containing JSON with "name" and "arguments".
|
|
Also handles unclosed <tool_call> at end-of-string (truncated generation).
|
|
"""
|
|
|
|
# Matches both closed and unclosed tool_call tags
|
|
PATTERN = re.compile(
|
|
r"<tool_call>\s*(.*?)\s*</tool_call>|<tool_call>\s*(.*)", re.DOTALL
|
|
)
|
|
|
|
def parse(self, text: str) -> ParseResult:
|
|
if "<tool_call>" not in text:
|
|
return text, None
|
|
|
|
try:
|
|
matches = self.PATTERN.findall(text)
|
|
if not matches:
|
|
return text, None
|
|
|
|
tool_calls: List[ChatCompletionMessageToolCall] = []
|
|
for match in matches:
|
|
# match is a tuple: (closed_content, unclosed_content)
|
|
raw_json = match[0] if match[0] else match[1]
|
|
if not raw_json.strip():
|
|
continue
|
|
|
|
tc_data = json.loads(raw_json)
|
|
# Handle arguments: could be dict or already a JSON string
|
|
raw_args = tc_data.get("arguments", {})
|
|
if isinstance(raw_args, str):
|
|
# Already a string — use as-is (it's already JSON)
|
|
args_str = raw_args
|
|
else:
|
|
# Dict — serialize to JSON
|
|
args_str = json.dumps(raw_args, ensure_ascii=False)
|
|
tool_calls.append(
|
|
ChatCompletionMessageToolCall(
|
|
id=f"call_{uuid.uuid4().hex[:8]}",
|
|
type="function",
|
|
function=Function(
|
|
name=tc_data["name"],
|
|
arguments=args_str,
|
|
),
|
|
)
|
|
)
|
|
|
|
if not tool_calls:
|
|
return text, None
|
|
|
|
# Content is everything before the first <tool_call> tag
|
|
content = text[: text.find("<tool_call>")].strip()
|
|
return content if content else None, tool_calls
|
|
|
|
except Exception:
|
|
return text, None
|