mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-25 05:52:34 +00:00
chore: remove Atropos RL environments and tinker-atropos integration (#26106)
* chore: remove Atropos RL environments, tools, tests, skill, and tinker-atropos submodule Delete: - environments/ (43 files — base env, agent loop, tool call parsers, benchmarks) - rl_cli.py (standalone RL training CLI) - tools/rl_training_tool.py (all 10 rl_* tools) - tests: test_rl_training_tool, test_tool_call_parsers, test_managed_server_tool_support, test_agent_loop, test_agent_loop_vllm, test_agent_loop_tool_calling, test_terminalbench2_env_security - optional-skills/mlops/hermes-atropos-environments/ - tinker-atropos git submodule + .gitmodules * chore: remove RL/Atropos references from Python source - toolsets.py: remove rl toolset block + update comment - model_tools.py: remove rl_tools group + update async bridging comment - hermes_cli/tools_config.py: remove RL display entry, _DEFAULT_OFF_TOOLSETS, setup block, and rl_training post-setup handler - tools/budget_config.py: remove RL environment reference in docstring - tests/test_model_tools.py: remove rl_tools from expected groups - tests/run_agent/test_streaming_tool_call_repair.py: fix stale cross-reference * chore: remove rl/yc-bench extras and tinker-atropos refs from pyproject.toml - Remove rl extra (atroposlib, tinker, fastapi, uvicorn, wandb) - Remove yc-bench extra - Remove rl_cli from py-modules - Remove [tool.ty.src] exclude for tinker-atropos - Remove [tool.ruff] exclude for tinker-atropos - Regenerate uv.lock * chore: remove tinker-atropos from install/setup scripts - setup-hermes.sh: remove entire tinker-atropos submodule install block - scripts/install.sh: remove both tinker-atropos blocks (Termux + standard) - scripts/install.ps1: remove tinker-atropos block - nix/hermes-agent.nix: remove tinker-atropos pip install line * chore: remove RL references from cli-config.yaml.example * docs: remove Atropos/RL references from README, CONTRIBUTING, AGENTS.md * docs: remove RL/Atropos references from website - Delete: environments.md, rl-training.md, mlops-hermes-atropos-environments.md - sidebars.ts: remove rl-training and environments sidebar entries - optional-skills-catalog.md: remove hermes-atropos-environments row - tools-reference.md: remove entire rl toolset section - toolsets-reference.md: remove rl row + update example - integrations/index.md: remove RL Training bullet - architecture.md: remove environments/ from tree + RL section - contributing.md: remove tinker-atropos setup - updating.md: remove tinker-atropos install + stale submodule update * chore: remove remaining RL/Atropos stragglers - hermes_cli/config.py: remove TINKER_API_KEY + WANDB_API_KEY env var defs - hermes_cli/doctor.py: remove Submodules check section (tinker-atropos) - hermes_cli/setup.py: remove RL Training status check - hermes_cli/status.py: remove Tinker + WandB from API key status display - agent/display.py: remove both rl_* tool preview/activity blocks - website/docs: remove RL references from providers.md + env-variables.md - tests: remove TINKER_API_KEY from conftest, set_config_value, setup_script * chore: remove RL training section from .env.example
This commit is contained in:
parent
d364132114
commit
5af672c753
97 changed files with 18 additions and 15690 deletions
|
|
@ -1,178 +0,0 @@
|
|||
"""
|
||||
Tests for ManagedServer / tool-parser integration.
|
||||
|
||||
Validates that:
|
||||
1. The installed atroposlib API still matches Hermes's expectations
|
||||
2. Hermes's parser registry remains compatible with ManagedServer parsing
|
||||
3. HermesAgentBaseEnv wires the selected parser into ServerManager correctly
|
||||
|
||||
These tests verify the contract between hermes-agent's environments/ code
|
||||
and atroposlib's ManagedServer. They detect API incompatibilities early.
|
||||
"""
|
||||
|
||||
import inspect
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||
|
||||
try:
|
||||
import atroposlib # noqa: F401
|
||||
except ImportError:
|
||||
pytest.skip("atroposlib not installed", allow_module_level=True)
|
||||
|
||||
|
||||
class TestManagedServerAPI:
|
||||
"""Test that ManagedServer's API matches what hermes-agent expects."""
|
||||
|
||||
def test_managed_server_init_signature(self):
|
||||
"""ManagedServer should accept tool_call_parser parameter."""
|
||||
from atroposlib.envs.server_handling.managed_server import ManagedServer
|
||||
|
||||
sig = inspect.signature(ManagedServer.__init__)
|
||||
params = list(sig.parameters.keys())
|
||||
|
||||
# Core params that must exist
|
||||
assert "self" in params
|
||||
assert "server" in params
|
||||
assert "tokenizer" in params
|
||||
assert "track_tree" in params
|
||||
|
||||
# tool_call_parser — required for tool_call_support branch
|
||||
# If this fails, atroposlib hasn't been updated to tool_call_support
|
||||
has_tool_parser = "tool_call_parser" in params
|
||||
if not has_tool_parser:
|
||||
pytest.skip(
|
||||
"ManagedServer does not have tool_call_parser param — "
|
||||
"baseline atroposlib (pre tool_call_support branch)"
|
||||
)
|
||||
|
||||
def test_server_manager_managed_server_signature(self):
|
||||
"""ServerManager.managed_server() should accept tool_call_parser."""
|
||||
from atroposlib.envs.server_handling.server_manager import ServerManager
|
||||
|
||||
sig = inspect.signature(ServerManager.managed_server)
|
||||
params = list(sig.parameters.keys())
|
||||
|
||||
assert "self" in params
|
||||
assert "tokenizer" in params
|
||||
|
||||
has_tool_parser = "tool_call_parser" in params
|
||||
if not has_tool_parser:
|
||||
pytest.skip(
|
||||
"ServerManager.managed_server() does not have tool_call_parser param — "
|
||||
"baseline atroposlib (pre tool_call_support branch)"
|
||||
)
|
||||
|
||||
def test_managed_server_chat_template_kwargs(self):
|
||||
"""ManagedServer should have CHAT_TEMPLATE_KWARGS for forwarding tools/thinking."""
|
||||
from atroposlib.envs.server_handling.managed_server import ManagedServer
|
||||
|
||||
if not hasattr(ManagedServer, "CHAT_TEMPLATE_KWARGS"):
|
||||
pytest.skip(
|
||||
"ManagedServer does not have CHAT_TEMPLATE_KWARGS — "
|
||||
"baseline atroposlib (pre tool_call_support branch)"
|
||||
)
|
||||
|
||||
kwargs = ManagedServer.CHAT_TEMPLATE_KWARGS
|
||||
assert "tools" in kwargs, "tools must be in CHAT_TEMPLATE_KWARGS"
|
||||
|
||||
def test_no_get_logprobs_method(self):
|
||||
"""get_logprobs should be removed in tool_call_support branch."""
|
||||
from atroposlib.envs.server_handling.managed_server import ManagedServer
|
||||
|
||||
# In baseline, get_logprobs exists. In tool_call_support, it's removed.
|
||||
# We just note the state — not a hard fail either way.
|
||||
has_get_logprobs = hasattr(ManagedServer, "get_logprobs")
|
||||
if has_get_logprobs:
|
||||
pytest.skip(
|
||||
"ManagedServer still has get_logprobs — baseline atroposlib"
|
||||
)
|
||||
|
||||
|
||||
class TestParserCompatibility:
|
||||
"""Test that hermes-agent's parsers match ManagedServer's expectations."""
|
||||
|
||||
def test_parser_parse_returns_correct_format(self):
|
||||
"""
|
||||
ManagedServer expects parser.parse(text) -> (content, tool_calls)
|
||||
where tool_calls is a list of objects with .id, .function.name, .function.arguments
|
||||
"""
|
||||
from environments.tool_call_parsers import get_parser
|
||||
|
||||
parser = get_parser("hermes")
|
||||
text = '<tool_call>{"name": "terminal", "arguments": {"command": "ls"}}</tool_call>'
|
||||
content, tool_calls = parser.parse(text)
|
||||
|
||||
assert tool_calls is not None
|
||||
assert len(tool_calls) == 1
|
||||
|
||||
tc = tool_calls[0]
|
||||
# ManagedServer accesses these attrs directly
|
||||
assert hasattr(tc, "id")
|
||||
assert hasattr(tc, "function")
|
||||
assert hasattr(tc.function, "name")
|
||||
assert hasattr(tc.function, "arguments")
|
||||
|
||||
def test_parser_no_tools_returns_none(self):
|
||||
"""ManagedServer checks `if parsed_tool_calls:` — None should be falsy."""
|
||||
from environments.tool_call_parsers import get_parser
|
||||
|
||||
parser = get_parser("hermes")
|
||||
content, tool_calls = parser.parse("Just text, no tools")
|
||||
assert tool_calls is None
|
||||
|
||||
def test_parser_content_is_string_or_none(self):
|
||||
"""ManagedServer uses `parsed_content or ""` — must be str or None."""
|
||||
from environments.tool_call_parsers import get_parser
|
||||
|
||||
parser = get_parser("hermes")
|
||||
|
||||
# With tool calls
|
||||
text = '<tool_call>{"name": "terminal", "arguments": {"command": "ls"}}</tool_call>'
|
||||
content, _ = parser.parse(text)
|
||||
assert content is None or isinstance(content, str)
|
||||
|
||||
# Without tool calls
|
||||
content2, _ = parser.parse("Just text")
|
||||
assert isinstance(content2, str)
|
||||
|
||||
|
||||
class TestBaseEnvCompatibility:
|
||||
"""Test that hermes_base_env.py's tool-parser wiring matches the current API."""
|
||||
|
||||
def test_hermes_base_env_sets_server_manager_tool_parser(self):
|
||||
"""Hermes wires parser selection through ServerManager.tool_parser."""
|
||||
import ast
|
||||
|
||||
base_env_path = Path(__file__).parent.parent.parent / "environments" / "hermes_base_env.py"
|
||||
source = base_env_path.read_text()
|
||||
tree = ast.parse(source)
|
||||
|
||||
found_assignment = False
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.Assign):
|
||||
for target in node.targets:
|
||||
if isinstance(target, ast.Attribute) and target.attr == "tool_parser":
|
||||
parent = target.value
|
||||
if (
|
||||
isinstance(parent, ast.Attribute)
|
||||
and parent.attr == "server"
|
||||
and isinstance(parent.value, ast.Name)
|
||||
and parent.value.id == "self"
|
||||
):
|
||||
found_assignment = True
|
||||
|
||||
assert found_assignment, (
|
||||
"hermes_base_env.py should set self.server.tool_parser from config.tool_call_parser"
|
||||
)
|
||||
|
||||
def test_hermes_base_env_uses_config_tool_call_parser(self):
|
||||
"""Verify hermes_base_env uses the config field rather than a local parser instance."""
|
||||
base_env_path = Path(__file__).parent.parent.parent / "environments" / "hermes_base_env.py"
|
||||
source = base_env_path.read_text()
|
||||
|
||||
assert 'tool_call_parser: str = Field(' in source
|
||||
assert 'self.server.tool_parser = config.tool_call_parser' in source
|
||||
|
|
@ -1,142 +0,0 @@
|
|||
"""Tests for rl_training_tool.py — file handle lifecycle and cleanup.
|
||||
|
||||
Verifies that _stop_training_run properly closes log file handles,
|
||||
terminates processes, and handles edge cases on failure paths.
|
||||
Inspired by PR #715 (0xbyt4).
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.rl_training_tool import RunState, _stop_training_run
|
||||
|
||||
|
||||
def _make_run_state(**overrides) -> RunState:
|
||||
"""Create a minimal RunState for testing."""
|
||||
defaults = {
|
||||
"run_id": "test-run-001",
|
||||
"environment": "test_env",
|
||||
"config": {},
|
||||
}
|
||||
defaults.update(overrides)
|
||||
return RunState(**defaults)
|
||||
|
||||
|
||||
class TestStopTrainingRunFileHandles:
|
||||
"""Verify that _stop_training_run closes log file handles stored as attributes."""
|
||||
|
||||
def test_closes_all_log_file_handles(self):
|
||||
state = _make_run_state()
|
||||
files = {}
|
||||
for attr in ("api_log_file", "trainer_log_file", "env_log_file"):
|
||||
fh = MagicMock()
|
||||
setattr(state, attr, fh)
|
||||
files[attr] = fh
|
||||
|
||||
_stop_training_run(state)
|
||||
|
||||
for attr, fh in files.items():
|
||||
fh.close.assert_called_once()
|
||||
assert getattr(state, attr) is None
|
||||
|
||||
def test_clears_file_attrs_to_none(self):
|
||||
state = _make_run_state()
|
||||
state.api_log_file = MagicMock()
|
||||
|
||||
_stop_training_run(state)
|
||||
|
||||
assert state.api_log_file is None
|
||||
|
||||
def test_close_exception_does_not_propagate(self):
|
||||
"""If a file handle .close() raises, it must not crash."""
|
||||
state = _make_run_state()
|
||||
bad_fh = MagicMock()
|
||||
bad_fh.close.side_effect = OSError("already closed")
|
||||
good_fh = MagicMock()
|
||||
state.api_log_file = bad_fh
|
||||
state.trainer_log_file = good_fh
|
||||
|
||||
_stop_training_run(state) # should not raise
|
||||
|
||||
bad_fh.close.assert_called_once()
|
||||
good_fh.close.assert_called_once()
|
||||
|
||||
def test_handles_missing_file_attrs(self):
|
||||
"""RunState without log file attrs should not crash."""
|
||||
state = _make_run_state()
|
||||
# No log file attrs set at all — getattr(..., None) should handle it
|
||||
_stop_training_run(state) # should not raise
|
||||
|
||||
|
||||
class TestStopTrainingRunProcesses:
|
||||
"""Verify that _stop_training_run terminates processes correctly."""
|
||||
|
||||
def test_terminates_running_processes(self):
|
||||
state = _make_run_state()
|
||||
for attr in ("api_process", "trainer_process", "env_process"):
|
||||
proc = MagicMock()
|
||||
proc.poll.return_value = None # still running
|
||||
setattr(state, attr, proc)
|
||||
|
||||
_stop_training_run(state)
|
||||
|
||||
for attr in ("api_process", "trainer_process", "env_process"):
|
||||
getattr(state, attr).terminate.assert_called_once()
|
||||
|
||||
def test_does_not_terminate_exited_processes(self):
|
||||
state = _make_run_state()
|
||||
proc = MagicMock()
|
||||
proc.poll.return_value = 0 # already exited
|
||||
state.api_process = proc
|
||||
|
||||
_stop_training_run(state)
|
||||
|
||||
proc.terminate.assert_not_called()
|
||||
|
||||
def test_handles_none_processes(self):
|
||||
state = _make_run_state()
|
||||
# All process attrs are None by default
|
||||
_stop_training_run(state) # should not raise
|
||||
|
||||
def test_handles_mixed_running_and_exited_processes(self):
|
||||
state = _make_run_state()
|
||||
# api still running
|
||||
api = MagicMock()
|
||||
api.poll.return_value = None
|
||||
state.api_process = api
|
||||
# trainer already exited
|
||||
trainer = MagicMock()
|
||||
trainer.poll.return_value = 0
|
||||
state.trainer_process = trainer
|
||||
# env is None
|
||||
state.env_process = None
|
||||
|
||||
_stop_training_run(state)
|
||||
|
||||
api.terminate.assert_called_once()
|
||||
trainer.terminate.assert_not_called()
|
||||
|
||||
|
||||
class TestStopTrainingRunStatus:
|
||||
"""Verify status transitions in _stop_training_run."""
|
||||
|
||||
def test_sets_status_to_stopped_when_running(self):
|
||||
state = _make_run_state(status="running")
|
||||
_stop_training_run(state)
|
||||
assert state.status == "stopped"
|
||||
|
||||
def test_does_not_change_status_when_failed(self):
|
||||
state = _make_run_state(status="failed")
|
||||
_stop_training_run(state)
|
||||
assert state.status == "failed"
|
||||
|
||||
def test_does_not_change_status_when_pending(self):
|
||||
state = _make_run_state(status="pending")
|
||||
_stop_training_run(state)
|
||||
assert state.status == "pending"
|
||||
|
||||
def test_no_crash_with_no_processes_and_no_files(self):
|
||||
state = _make_run_state()
|
||||
_stop_training_run(state) # should not raise
|
||||
assert state.status == "pending"
|
||||
|
|
@ -1,274 +0,0 @@
|
|||
"""
|
||||
Tests for environments/tool_call_parsers/ — client-side tool call parsers.
|
||||
|
||||
These parsers extract structured tool_calls from raw model output text.
|
||||
Used in Phase 2 (VLLM/generate) where the server returns raw tokens.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
# Ensure repo root is importable
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||
|
||||
try:
|
||||
from environments.tool_call_parsers import (
|
||||
ParseResult,
|
||||
ToolCallParser,
|
||||
get_parser,
|
||||
list_parsers,
|
||||
)
|
||||
except ImportError:
|
||||
pytest.skip("atroposlib not installed", allow_module_level=True)
|
||||
|
||||
|
||||
# ─── Registry tests ─────────────────────────────────────────────────────
|
||||
|
||||
class TestParserRegistry:
|
||||
def test_list_parsers_returns_nonempty(self):
|
||||
parsers = list_parsers()
|
||||
assert len(parsers) > 0
|
||||
|
||||
def test_hermes_parser_registered(self):
|
||||
parsers = list_parsers()
|
||||
assert "hermes" in parsers
|
||||
|
||||
def test_get_parser_returns_instance(self):
|
||||
parser = get_parser("hermes")
|
||||
assert isinstance(parser, ToolCallParser)
|
||||
|
||||
def test_get_parser_unknown_raises(self):
|
||||
with pytest.raises(KeyError):
|
||||
get_parser("nonexistent_parser_xyz")
|
||||
|
||||
def test_all_registered_parsers_instantiate(self):
|
||||
"""Every registered parser should be importable and instantiable."""
|
||||
for name in list_parsers():
|
||||
parser = get_parser(name)
|
||||
assert isinstance(parser, ToolCallParser)
|
||||
assert hasattr(parser, "parse")
|
||||
|
||||
|
||||
# ─── Hermes parser tests ────────────────────────────────────────────────
|
||||
|
||||
class TestHermesParser:
|
||||
@pytest.fixture
|
||||
def parser(self):
|
||||
return get_parser("hermes")
|
||||
|
||||
def test_no_tool_call(self, parser):
|
||||
text = "Hello, I can help you with that."
|
||||
content, tool_calls = parser.parse(text)
|
||||
assert content == text
|
||||
assert tool_calls is None
|
||||
|
||||
def test_single_tool_call(self, parser):
|
||||
text = '<tool_call>{"name": "terminal", "arguments": {"command": "ls -la"}}</tool_call>'
|
||||
content, tool_calls = parser.parse(text)
|
||||
assert tool_calls is not None
|
||||
assert len(tool_calls) == 1
|
||||
assert tool_calls[0].function.name == "terminal"
|
||||
args = json.loads(tool_calls[0].function.arguments)
|
||||
assert args["command"] == "ls -la"
|
||||
|
||||
def test_tool_call_with_surrounding_text(self, parser):
|
||||
text = 'Let me check that for you.\n<tool_call>{"name": "terminal", "arguments": {"command": "pwd"}}</tool_call>'
|
||||
content, tool_calls = parser.parse(text)
|
||||
assert tool_calls is not None
|
||||
assert len(tool_calls) == 1
|
||||
assert tool_calls[0].function.name == "terminal"
|
||||
# Content should have the surrounding text
|
||||
if content is not None:
|
||||
assert "check that" in content or content.strip() != ""
|
||||
|
||||
def test_multiple_tool_calls(self, parser):
|
||||
text = (
|
||||
'<tool_call>{"name": "terminal", "arguments": {"command": "ls"}}</tool_call>\n'
|
||||
'<tool_call>{"name": "read_file", "arguments": {"path": "test.py"}}</tool_call>'
|
||||
)
|
||||
content, tool_calls = parser.parse(text)
|
||||
assert tool_calls is not None
|
||||
assert len(tool_calls) == 2
|
||||
names = {tc.function.name for tc in tool_calls}
|
||||
assert "terminal" in names
|
||||
assert "read_file" in names
|
||||
|
||||
def test_tool_call_ids_are_unique(self, parser):
|
||||
text = (
|
||||
'<tool_call>{"name": "terminal", "arguments": {"command": "ls"}}</tool_call>\n'
|
||||
'<tool_call>{"name": "terminal", "arguments": {"command": "pwd"}}</tool_call>'
|
||||
)
|
||||
_, tool_calls = parser.parse(text)
|
||||
assert tool_calls is not None
|
||||
ids = [tc.id for tc in tool_calls]
|
||||
assert len(ids) == len(set(ids)), "Tool call IDs must be unique"
|
||||
|
||||
def test_empty_string(self, parser):
|
||||
content, tool_calls = parser.parse("")
|
||||
assert tool_calls is None
|
||||
|
||||
def test_malformed_json_in_tool_call(self, parser):
|
||||
text = '<tool_call>not valid json</tool_call>'
|
||||
content, tool_calls = parser.parse(text)
|
||||
# Should either return None tool_calls or handle gracefully
|
||||
# (implementation may vary — some parsers return error tool calls)
|
||||
|
||||
def test_truncated_tool_call(self, parser):
|
||||
"""Test handling of unclosed tool_call tag (model truncated mid-generation)."""
|
||||
text = '<tool_call>{"name": "terminal", "arguments": {"command": "ls -la"}'
|
||||
content, tool_calls = parser.parse(text)
|
||||
# Parser should handle truncated output gracefully
|
||||
# Either parse it successfully or return None
|
||||
|
||||
|
||||
# ─── Parse result contract tests (applies to ALL parsers) ───────────────
|
||||
|
||||
class TestParseResultContract:
|
||||
"""Ensure all parsers conform to the ParseResult contract."""
|
||||
|
||||
@pytest.fixture(params=["hermes"]) # Add more as needed
|
||||
def parser(self, request):
|
||||
return get_parser(request.param)
|
||||
|
||||
def test_returns_tuple_of_two(self, parser):
|
||||
result = parser.parse("hello world")
|
||||
assert isinstance(result, tuple)
|
||||
assert len(result) == 2
|
||||
|
||||
def test_no_tools_returns_none_tool_calls(self, parser):
|
||||
content, tool_calls = parser.parse("Just plain text, no tools.")
|
||||
assert tool_calls is None
|
||||
assert content is not None
|
||||
|
||||
def test_tool_calls_are_proper_objects(self, parser):
|
||||
"""When tool calls are found, they should be ChatCompletionMessageToolCall objects."""
|
||||
# Use hermes format since that's universal
|
||||
text = '<tool_call>{"name": "terminal", "arguments": {"command": "echo hi"}}</tool_call>'
|
||||
content, tool_calls = parser.parse(text)
|
||||
if tool_calls is not None:
|
||||
for tc in tool_calls:
|
||||
assert hasattr(tc, "id")
|
||||
assert hasattr(tc, "function")
|
||||
assert hasattr(tc.function, "name")
|
||||
assert hasattr(tc.function, "arguments")
|
||||
assert tc.id is not None
|
||||
assert isinstance(tc.function.name, str)
|
||||
assert isinstance(tc.function.arguments, str)
|
||||
|
||||
|
||||
# ─── DeepSeek V3 parser tests ───────────────────────────────────────────
|
||||
|
||||
class TestDeepSeekV3Parser:
|
||||
@pytest.fixture
|
||||
def parser(self):
|
||||
return get_parser("deepseek_v3")
|
||||
|
||||
def test_no_tool_call(self, parser):
|
||||
text = "Hello, how can I help you?"
|
||||
content, tool_calls = parser.parse(text)
|
||||
assert content == text
|
||||
assert tool_calls is None
|
||||
|
||||
def test_single_tool_call(self, parser):
|
||||
text = (
|
||||
'<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_weather\n'
|
||||
'```json\n{"city": "London"}\n```<|tool▁call▁end|><|tool▁calls▁end|>'
|
||||
)
|
||||
content, tool_calls = parser.parse(text)
|
||||
assert tool_calls is not None
|
||||
assert len(tool_calls) == 1
|
||||
assert tool_calls[0].function.name == "get_weather"
|
||||
args = json.loads(tool_calls[0].function.arguments)
|
||||
assert args["city"] == "London"
|
||||
|
||||
def test_multiple_tool_calls(self, parser):
|
||||
text = (
|
||||
'<|tool▁calls▁begin|>'
|
||||
'<|tool▁call▁begin|>function<|tool▁sep|>get_weather\n'
|
||||
'```json\n{"city": "London"}\n```<|tool▁call▁end|>'
|
||||
'<|tool▁call▁begin|>function<|tool▁sep|>get_time\n'
|
||||
'```json\n{"timezone": "UTC"}\n```<|tool▁call▁end|>'
|
||||
'<|tool▁calls▁end|>'
|
||||
)
|
||||
content, tool_calls = parser.parse(text)
|
||||
assert tool_calls is not None
|
||||
assert len(tool_calls) == 2, f"Expected 2 tool calls, got {len(tool_calls)}"
|
||||
names = [tc.function.name for tc in tool_calls]
|
||||
assert "get_weather" in names
|
||||
assert "get_time" in names
|
||||
|
||||
def test_tool_call_with_preceding_text(self, parser):
|
||||
text = (
|
||||
'Let me check that for you.\n'
|
||||
'<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>terminal\n'
|
||||
'```json\n{"command": "ls"}\n```<|tool▁call▁end|><|tool▁calls▁end|>'
|
||||
)
|
||||
content, tool_calls = parser.parse(text)
|
||||
assert tool_calls is not None
|
||||
assert len(tool_calls) == 1
|
||||
|
||||
|
||||
# ─── Mistral parser tests ───────────────────────────────────────────────
|
||||
|
||||
class TestMistralParser:
|
||||
@pytest.fixture
|
||||
def parser(self):
|
||||
return get_parser("mistral")
|
||||
|
||||
def test_no_tool_call(self, parser):
|
||||
text = "Hello, how can I help you?"
|
||||
content, tool_calls = parser.parse(text)
|
||||
assert content == text
|
||||
assert tool_calls is None
|
||||
|
||||
def test_pre_v11_single_tool_call(self, parser):
|
||||
text = '[TOOL_CALLS] [{"name": "func", "arguments": {"key": "val"}}]'
|
||||
content, tool_calls = parser.parse(text)
|
||||
assert tool_calls is not None
|
||||
assert len(tool_calls) == 1
|
||||
assert tool_calls[0].function.name == "func"
|
||||
args = json.loads(tool_calls[0].function.arguments)
|
||||
assert args["key"] == "val"
|
||||
|
||||
def test_pre_v11_nested_json(self, parser):
|
||||
text = '[TOOL_CALLS] [{"name": "func", "arguments": {"nested": {"deep": true}}}]'
|
||||
content, tool_calls = parser.parse(text)
|
||||
assert tool_calls is not None
|
||||
assert len(tool_calls) == 1
|
||||
assert tool_calls[0].function.name == "func"
|
||||
args = json.loads(tool_calls[0].function.arguments)
|
||||
assert args["nested"]["deep"] is True
|
||||
|
||||
def test_v11_single_tool_call(self, parser):
|
||||
text = '[TOOL_CALLS]get_weather{"city": "London"}'
|
||||
content, tool_calls = parser.parse(text)
|
||||
assert tool_calls is not None
|
||||
assert len(tool_calls) == 1
|
||||
assert tool_calls[0].function.name == "get_weather"
|
||||
args = json.loads(tool_calls[0].function.arguments)
|
||||
assert args["city"] == "London"
|
||||
|
||||
def test_v11_multiple_tool_calls(self, parser):
|
||||
text = '[TOOL_CALLS]func1{"a": 1}[TOOL_CALLS]func2{"b": 2}'
|
||||
content, tool_calls = parser.parse(text)
|
||||
assert tool_calls is not None
|
||||
assert len(tool_calls) == 2
|
||||
names = [tc.function.name for tc in tool_calls]
|
||||
assert "func1" in names
|
||||
assert "func2" in names
|
||||
|
||||
def test_preceding_text_preserved(self, parser):
|
||||
text = 'Hello[TOOL_CALLS]func{"a": 1}'
|
||||
content, tool_calls = parser.parse(text)
|
||||
assert content == "Hello"
|
||||
assert tool_calls is not None
|
||||
assert len(tool_calls) == 1
|
||||
assert tool_calls[0].function.name == "func"
|
||||
|
||||
def test_malformed_json_fallback(self, parser):
|
||||
text = "[TOOL_CALLS] not valid json"
|
||||
content, tool_calls = parser.parse(text)
|
||||
assert tool_calls is None
|
||||
Loading…
Add table
Add a link
Reference in a new issue