mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
* chore: remove Atropos RL environments, tools, tests, skill, and tinker-atropos submodule Delete: - environments/ (43 files — base env, agent loop, tool call parsers, benchmarks) - rl_cli.py (standalone RL training CLI) - tools/rl_training_tool.py (all 10 rl_* tools) - tests: test_rl_training_tool, test_tool_call_parsers, test_managed_server_tool_support, test_agent_loop, test_agent_loop_vllm, test_agent_loop_tool_calling, test_terminalbench2_env_security - optional-skills/mlops/hermes-atropos-environments/ - tinker-atropos git submodule + .gitmodules * chore: remove RL/Atropos references from Python source - toolsets.py: remove rl toolset block + update comment - model_tools.py: remove rl_tools group + update async bridging comment - hermes_cli/tools_config.py: remove RL display entry, _DEFAULT_OFF_TOOLSETS, setup block, and rl_training post-setup handler - tools/budget_config.py: remove RL environment reference in docstring - tests/test_model_tools.py: remove rl_tools from expected groups - tests/run_agent/test_streaming_tool_call_repair.py: fix stale cross-reference * chore: remove rl/yc-bench extras and tinker-atropos refs from pyproject.toml - Remove rl extra (atroposlib, tinker, fastapi, uvicorn, wandb) - Remove yc-bench extra - Remove rl_cli from py-modules - Remove [tool.ty.src] exclude for tinker-atropos - Remove [tool.ruff] exclude for tinker-atropos - Regenerate uv.lock * chore: remove tinker-atropos from install/setup scripts - setup-hermes.sh: remove entire tinker-atropos submodule install block - scripts/install.sh: remove both tinker-atropos blocks (Termux + standard) - scripts/install.ps1: remove tinker-atropos block - nix/hermes-agent.nix: remove tinker-atropos pip install line * chore: remove RL references from cli-config.yaml.example * docs: remove Atropos/RL references from README, CONTRIBUTING, AGENTS.md * docs: remove RL/Atropos references from website - Delete: environments.md, rl-training.md, mlops-hermes-atropos-environments.md - sidebars.ts: remove rl-training and environments sidebar entries - optional-skills-catalog.md: remove hermes-atropos-environments row - tools-reference.md: remove entire rl toolset section - toolsets-reference.md: remove rl row + update example - integrations/index.md: remove RL Training bullet - architecture.md: remove environments/ from tree + RL section - contributing.md: remove tinker-atropos setup - updating.md: remove tinker-atropos install + stale submodule update * chore: remove remaining RL/Atropos stragglers - hermes_cli/config.py: remove TINKER_API_KEY + WANDB_API_KEY env var defs - hermes_cli/doctor.py: remove Submodules check section (tinker-atropos) - hermes_cli/setup.py: remove RL Training status check - hermes_cli/status.py: remove Tinker + WandB from API key status display - agent/display.py: remove both rl_* tool preview/activity blocks - website/docs: remove RL references from providers.md + env-variables.md - tests: remove TINKER_API_KEY from conftest, set_config_value, setup_script * chore: remove RL training section from .env.example
116 lines
No EOL
4.6 KiB
Python
116 lines
No EOL
4.6 KiB
Python
"""Tests for tool call argument repair in the streaming assembly path.
|
|
|
|
The streaming path (run_agent._call_chat_completions) assembles tool call
|
|
deltas into full arguments. When a model truncates or malforms the JSON
|
|
(e.g. GLM-5.1 via Ollama), the assembly path used to pass the broken JSON
|
|
straight through — setting has_truncated_tool_args but NOT repairing it.
|
|
That triggered the truncation handler to kill the session with /new required.
|
|
|
|
The fix: repair arguments in the streaming assembly path using
|
|
_repair_tool_call_arguments() so repairable malformations (trailing commas,
|
|
unclosed brackets, Python None) don't kill the session.
|
|
"""
|
|
|
|
import json
|
|
import pytest
|
|
|
|
from run_agent import _repair_tool_call_arguments
|
|
|
|
|
|
class TestStreamingAssemblyRepair:
|
|
"""Verify that _repair_tool_call_arguments is applied to streaming tool
|
|
call arguments before they're assembled into mock_tool_calls.
|
|
|
|
These tests verify the REPAIR FUNCTION itself works correctly for the
|
|
cases that arise during streaming assembly. Integration tests that
|
|
exercise the full streaming path are in run_agent.py's streaming tests.
|
|
"""
|
|
|
|
# -- Truncation cases (most common streaming failure) --
|
|
|
|
def test_truncated_object_no_close_brace(self):
|
|
"""Model stops mid-JSON, common with output length limits."""
|
|
raw = '{"command": "ls -la", "timeout": 30'
|
|
result = _repair_tool_call_arguments(raw, "terminal")
|
|
parsed = json.loads(result)
|
|
assert parsed["command"] == "ls -la"
|
|
assert parsed["timeout"] == 30
|
|
|
|
def test_truncated_nested_object(self):
|
|
"""Model truncates inside a nested structure."""
|
|
raw = '{"path": "/tmp/foo", "content": "hello"'
|
|
result = _repair_tool_call_arguments(raw, "write_file")
|
|
parsed = json.loads(result)
|
|
assert parsed["path"] == "/tmp/foo"
|
|
|
|
def test_truncated_mid_value(self):
|
|
"""Model cuts off mid-string-value."""
|
|
raw = '{"command": "git clone ht'
|
|
result = _repair_tool_call_arguments(raw, "terminal")
|
|
# Should produce valid JSON (even if command value is lost)
|
|
json.loads(result)
|
|
|
|
# -- Trailing comma cases (Ollama/GLM common) --
|
|
|
|
def test_trailing_comma_before_close_brace(self):
|
|
raw = '{"path": "/tmp", "content": "x",}'
|
|
result = _repair_tool_call_arguments(raw, "write_file")
|
|
assert json.loads(result) == {"path": "/tmp", "content": "x"}
|
|
|
|
def test_trailing_comma_in_list(self):
|
|
raw = '{"items": [1, 2, 3,]}'
|
|
result = _repair_tool_call_arguments(raw, "test")
|
|
assert json.loads(result) == {"items": [1, 2, 3]}
|
|
|
|
# -- Python None from model output --
|
|
|
|
def test_python_none_literal(self):
|
|
raw = "None"
|
|
result = _repair_tool_call_arguments(raw, "test")
|
|
assert result == "{}"
|
|
|
|
# -- Empty arguments (some models emit empty string) --
|
|
|
|
def test_empty_string(self):
|
|
assert _repair_tool_call_arguments("", "test") == "{}"
|
|
|
|
def test_whitespace_only(self):
|
|
assert _repair_tool_call_arguments(" \n ", "test") == "{}"
|
|
|
|
# -- Already-valid JSON passes through unchanged --
|
|
|
|
def test_valid_json_passthrough(self):
|
|
raw = '{"path": "/tmp/foo", "content": "hello"}'
|
|
result = _repair_tool_call_arguments(raw, "write_file")
|
|
assert json.loads(result) == {"path": "/tmp/foo", "content": "hello"}
|
|
|
|
# -- Extra closing brackets (rare but happens) --
|
|
|
|
def test_extra_closing_brace(self):
|
|
raw = '{"key": "value"}}'
|
|
result = _repair_tool_call_arguments(raw, "test")
|
|
assert json.loads(result) == {"key": "value"}
|
|
|
|
# -- Real-world GLM-5.1 truncation pattern --
|
|
|
|
def test_glm_truncation_pattern(self):
|
|
"""GLM-5.1 via Ollama commonly truncates like this.
|
|
|
|
This pattern has an unclosed colon at the end ("background":) which
|
|
makes it unrepairable — the last-resort empty object {} is the
|
|
safest option. The important thing is that repairable patterns
|
|
(trailing comma, unclosed brace WITHOUT hanging colon) DO get fixed.
|
|
"""
|
|
raw = '{"command": "ls -la /tmp", "timeout": 30, "background":'
|
|
result = _repair_tool_call_arguments(raw, "terminal")
|
|
# Unrepairable — returns empty object (hanging colon can't be fixed)
|
|
parsed = json.loads(result)
|
|
assert parsed == {}
|
|
|
|
def test_glm_truncation_repairable(self):
|
|
"""GLM-5.1 truncation pattern that IS repairable."""
|
|
raw = '{"command": "ls -la /tmp", "timeout": 30'
|
|
result = _repair_tool_call_arguments(raw, "terminal")
|
|
parsed = json.loads(result)
|
|
assert parsed["command"] == "ls -la /tmp"
|
|
assert parsed["timeout"] == 30 |