diff --git a/environments/benchmarks/tblite/local_vllm.yaml b/environments/benchmarks/tblite/local_vllm.yaml index b6574a6bd..17689ba1d 100644 --- a/environments/benchmarks/tblite/local_vllm.yaml +++ b/environments/benchmarks/tblite/local_vllm.yaml @@ -4,7 +4,7 @@ # # Start the vLLM server from the atropos directory: # python -m example_trainer.vllm_api_server \ -# --model Qwen/Qwen3-4B-Thinking-2507 \ +# --model Qwen/Qwen3-4B-Instruct-2507 \ # --port 9001 \ # --gpu-memory-utilization 0.8 \ # --max-model-len=32000 @@ -26,14 +26,15 @@ env: task_timeout: 1200 eval_concurrency: 8 tool_call_parser: "hermes" - tokenizer_name: "Qwen/Qwen3-4B-Thinking-2507" + system_prompt: "You are an expert terminal agent. You MUST use the provided tools to complete tasks. Use the terminal tool to run shell commands, read_file to read files, write_file to write files, search_files to search, and patch to edit files. Do NOT write out solutions as text - execute them using the tools. Always start by exploring the environment with terminal commands." + tokenizer_name: "Qwen/Qwen3-4B-Instruct-2507" use_wandb: false - wandb_name: "tblite-qwen3-4b-thinking" + wandb_name: "tblite-qwen3-4b-instruct" ensure_scores_are_not_same: false data_dir_to_save_evals: "environments/benchmarks/evals/tblite-qwen3-4b-local" openai: base_url: "http://localhost:9001" - model_name: "Qwen/Qwen3-4B-Thinking-2507" + model_name: "Qwen/Qwen3-4B-Instruct-2507" server_type: "vllm" health_check: false