diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 9d0615d53..f7b95ff38 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1568,6 +1568,27 @@ def _model_flow_custom(config): effective_key = api_key or current_key + # Hint: most local model servers (Ollama, vLLM, llama.cpp) require /v1 + # in the base URL for OpenAI-compatible chat completions. Prompt the + # user if the URL looks like a local server without /v1. + _url_lower = effective_url.rstrip("/").lower() + _looks_local = any(h in _url_lower for h in ("localhost", "127.0.0.1", "0.0.0.0", ":11434", ":8080", ":5000")) + if _looks_local and not _url_lower.endswith("/v1"): + print() + print(f" Hint: Did you mean to add /v1 at the end?") + print(f" Most local model servers (Ollama, vLLM, llama.cpp) require it.") + print(f" e.g. {effective_url.rstrip('/')}/v1") + try: + _add_v1 = input(" Add /v1? [Y/n]: ").strip().lower() + except (KeyboardInterrupt, EOFError): + _add_v1 = "n" + if _add_v1 in ("", "y", "yes"): + effective_url = effective_url.rstrip("/") + "/v1" + if base_url: + base_url = effective_url + print(f" Updated URL: {effective_url}") + print() + from hermes_cli.models import probe_api_models probe = probe_api_models(effective_key, effective_url) diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index ee67f15b0..49ef1dc8f 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -984,7 +984,7 @@ class TestBuildApiKwargs: messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) assert kwargs["max_tokens"] == 65536 -======= + def test_ollama_think_false_on_effort_none(self, agent): """Custom (Ollama) provider with effort=none should inject think=false.""" agent.provider = "custom"