mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-27 01:11:40 +00:00
fix: use ManagedServer for vLLM in TBLite eval + local_vllm config
TBLite eval was bypassing ManagedServer and calling ServerManager directly, which uses /v1/chat/completions — not available on the atropos vllm_api_server (/generate only). Now uses _use_managed_server() to detect vLLM/SGLang backends and route through ManagedServer (Phase 2) with proper tool_parser and /generate endpoint. Falls back to Phase 1 for OpenAI endpoints. Also adds local_vllm.yaml config for running against a local vLLM server with Docker sandboxes.
This commit is contained in:
parent
93333387d6
commit
13f5459670
2 changed files with 70 additions and 11 deletions
39
environments/benchmarks/tblite/local_vllm.yaml
Normal file
39
environments/benchmarks/tblite/local_vllm.yaml
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
# OpenThoughts-TBLite Evaluation -- Local vLLM Backend
|
||||
#
|
||||
# Runs against a local vLLM server with Docker sandboxes.
|
||||
#
|
||||
# Start the vLLM server from the atropos directory:
|
||||
# python -m example_trainer.vllm_api_server \
|
||||
# --model Qwen/Qwen3-4B-Thinking-2507 \
|
||||
# --port 9001 \
|
||||
# --gpu-memory-utilization 0.8 \
|
||||
# --max-model-len=32000
|
||||
#
|
||||
# Then run:
|
||||
# python environments/benchmarks/tblite/tblite_env.py evaluate \
|
||||
# --config environments/benchmarks/tblite/local_vllm.yaml
|
||||
|
||||
env:
|
||||
enabled_toolsets: ["terminal", "file"]
|
||||
max_agent_turns: 60
|
||||
max_token_length: 16000
|
||||
agent_temperature: 0.6
|
||||
terminal_backend: "docker"
|
||||
terminal_timeout: 300
|
||||
tool_pool_size: 16
|
||||
dataset_name: "NousResearch/openthoughts-tblite"
|
||||
test_timeout: 600
|
||||
task_timeout: 1200
|
||||
eval_concurrency: 8
|
||||
tool_call_parser: "hermes"
|
||||
tokenizer_name: "Qwen/Qwen3-4B-Thinking-2507"
|
||||
use_wandb: false
|
||||
wandb_name: "tblite-qwen3-4b-thinking"
|
||||
ensure_scores_are_not_same: false
|
||||
data_dir_to_save_evals: "environments/benchmarks/evals/tblite-qwen3-4b-local"
|
||||
|
||||
openai:
|
||||
base_url: "http://localhost:9001"
|
||||
model_name: "Qwen/Qwen3-4B-Thinking-2507"
|
||||
server_type: "vllm"
|
||||
health_check: false
|
||||
Loading…
Add table
Add a link
Reference in a new issue