fix(cli): hint about /v1 suffix when configuring local model endpoints

When a user enters a local model server URL (Ollama, vLLM, llama.cpp) without a /v1 suffix during 'hermes model' custom endpoint setup, prompt them to add it. Most OpenAI-compatible local servers require /v1 in the base URL for chat completions to work.
2026-04-25 00:51:20 +00:00 · 2026-04-15 22:36:35 -07:00 · 2026-04-15 22:36:35 -07:00 · 5c397876b9
commit 5c397876b9
parent 8798b069d3
2 changed files with 22 additions and 1 deletions
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -1568,6 +1568,27 @@ def _model_flow_custom(config):

    effective_key = api_key or current_key

+    # Hint: most local model servers (Ollama, vLLM, llama.cpp) require /v1
+    # in the base URL for OpenAI-compatible chat completions.  Prompt the
+    # user if the URL looks like a local server without /v1.
+    _url_lower = effective_url.rstrip("/").lower()
+    _looks_local = any(h in _url_lower for h in ("localhost", "127.0.0.1", "0.0.0.0", ":11434", ":8080", ":5000"))
+    if _looks_local and not _url_lower.endswith("/v1"):
+        print()
+        print(f"  Hint: Did you mean to add /v1 at the end?")
+        print(f"  Most local model servers (Ollama, vLLM, llama.cpp) require it.")
+        print(f"  e.g. {effective_url.rstrip('/')}/v1")
+        try:
+            _add_v1 = input("  Add /v1? [Y/n]: ").strip().lower()
+        except (KeyboardInterrupt, EOFError):
+            _add_v1 = "n"
+        if _add_v1 in ("", "y", "yes"):
+            effective_url = effective_url.rstrip("/") + "/v1"
+            if base_url:
+                base_url = effective_url
+            print(f"  Updated URL: {effective_url}")
+        print()
+
    from hermes_cli.models import probe_api_models

    probe = probe_api_models(effective_key, effective_url)