diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 444ad6525ea..70177dbb3c0 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -478,6 +478,16 @@ def _infer_provider_from_url(base_url: str) -> Optional[str]:
     return None
 
 
+def _lmstudio_server_root(base_url: str) -> str:
+    """Return the LM Studio server root for native ``/api/v1`` endpoints."""
+    root = _normalize_base_url(base_url).rstrip("/")
+    for suffix in ("/api/v1", "/api", "/v1"):
+        if root.endswith(suffix):
+            root = root[: -len(suffix)].rstrip("/")
+            break
+    return root
+
+
 def _is_known_provider_base_url(base_url: str) -> bool:
     return _infer_provider_from_url(base_url) is not None
 
@@ -549,6 +559,7 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
     server_url = normalized
     if server_url.endswith("/v1"):
         server_url = server_url[:-3]
+    lmstudio_url = _lmstudio_server_root(base_url)
 
     headers = _auth_headers(api_key)
 
@@ -556,7 +567,7 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
         with httpx.Client(timeout=2.0, headers=headers) as client:
             # LM Studio exposes /api/v1/models — check first (most specific)
             try:
-                r = client.get(f"{server_url}/api/v1/models")
+                r = client.get(f"{lmstudio_url}/api/v1/models")
                 if r.status_code == 200:
                     return "lm-studio"
             except Exception:
@@ -774,7 +785,7 @@ def fetch_endpoint_model_metadata(
     if is_local_endpoint(normalized):
         try:
             if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
-                server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
+                server_url = _lmstudio_server_root(normalized)
                 response = requests.get(
                     server_url.rstrip("/") + "/api/v1/models",
                     headers=headers,
@@ -1297,6 +1308,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
     server_url = base_url.rstrip("/")
     if server_url.endswith("/v1"):
         server_url = server_url[:-3]
+    lmstudio_url = _lmstudio_server_root(base_url)
 
     headers = _auth_headers(api_key)
 
@@ -1340,7 +1352,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
             # Use _model_id_matches for fuzzy matching: LM Studio stores models as
             # "publisher/slug" but users configure only "slug" after "local:" prefix.
             if server_type == "lm-studio":
-                resp = client.get(f"{server_url}/api/v1/models")
+                resp = client.get(f"{lmstudio_url}/api/v1/models")
                 if resp.status_code == 200:
                     data = resp.json()
                     for m in data.get("models", []):
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 1548f4a3834..02a0d3eec90 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -704,6 +704,22 @@ def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) ->
     return default_url
 
 
+def _normalize_lmstudio_runtime_base_url(base_url: str) -> str:
+    """Return the OpenAI-compatible LM Studio runtime base URL.
+
+    LM Studio's native management API lives under ``/api/v1`` while its
+    OpenAI-compatible chat endpoint lives under ``/v1``. Users often paste
+    either form into ``LM_BASE_URL`` or ``model.base_url``; normalize before
+    the OpenAI SDK appends ``/chat/completions``.
+    """
+    root = str(base_url or "").strip().rstrip("/")
+    for suffix in ("/api/v1", "/api", "/v1"):
+        if root.endswith(suffix):
+            root = root[: -len(suffix)].rstrip("/")
+            break
+    return (root or "http://127.0.0.1:1234") + "/v1"
+
+
 # =============================================================================
 # Error Types
 # =============================================================================
@@ -6341,6 +6357,9 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
     else:
         base_url = pconfig.inference_base_url
 
+    if provider_id == "lmstudio":
+        base_url = _normalize_lmstudio_runtime_base_url(base_url)
+
     return {
         "provider": provider_id,
         "api_key": api_key,
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 38e7c80270a..cf3eb40edaa 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -2902,13 +2902,19 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool:
 
 
 def _lmstudio_server_root(base_url: Optional[str]) -> Optional[str]:
-    """Strip ``/v1`` suffix from an LM Studio base URL to get the native API root.
+    """Return the LM Studio server root for native ``/api/v1`` endpoints.
 
+    Users commonly copy either the OpenAI-compatible runtime URL
+    (``.../v1``) or the native API prefix (``.../api`` / ``.../api/v1``).
+    Native probes append ``/api/v1/...`` themselves, so normalize all accepted
+    forms back to the bare server root to avoid ``/api/api/v1`` requests.
     Returns ``None`` when the base URL is empty/invalid.
     """
     root = (base_url or "").strip().rstrip("/")
-    if root.endswith("/v1"):
-        root = root[:-3].rstrip("/")
+    for suffix in ("/api/v1", "/api", "/v1"):
+        if root.endswith(suffix):
+            root = root[: -len(suffix)].rstrip("/")
+            break
     return root or None
 
 
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index 50a826ebbae..a30bdcc3a17 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -463,6 +463,9 @@ def _resolve_runtime_from_pool_entry(
         provider=provider, api_mode=api_mode, model_cfg=model_cfg
     )
 
+    if provider == "lmstudio":
+        base_url = auth_mod._normalize_lmstudio_runtime_base_url(base_url)
+
     return {
         "provider": provider,
         "api_mode": api_mode,
@@ -1914,6 +1917,8 @@ def resolve_runtime_provider(
         # Strip trailing /v1 for OpenCode Anthropic models (see comment above).
         if api_mode == "anthropic_messages" and provider in {"opencode-zen", "opencode-go"}:
             base_url = re.sub(r"/v1/?$", "", base_url)
+        if provider == "lmstudio":
+            base_url = auth_mod._normalize_lmstudio_runtime_base_url(base_url)
         return {
             "provider": provider,
             "api_mode": api_mode,
diff --git a/tests/agent/test_model_metadata_local_ctx.py b/tests/agent/test_model_metadata_local_ctx.py
index ca1c5d3f94a..9b0268bda0f 100644
--- a/tests/agent/test_model_metadata_local_ctx.py
+++ b/tests/agent/test_model_metadata_local_ctx.py
@@ -424,6 +424,31 @@ class TestQueryLocalContextLengthLmStudio:
             "max_context_length (1048576) must not win over loaded_instances."
         )
 
+    def test_lmstudio_native_api_base_url_is_not_doubled(self):
+        from agent.model_metadata import _query_local_context_length
+
+        native_resp = self._make_resp(200, {
+            "models": [
+                {
+                    "key": "publisher/model-a",
+                    "id": "publisher/model-a",
+                    "loaded_instances": [{"config": {"context_length": 32768}}],
+                },
+            ]
+        })
+        client_mock = self._make_client(
+            native_resp,
+            self._make_resp(404, {}),
+            self._make_resp(404, {}),
+        )
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="lm-studio"), \
+             patch("httpx.Client", return_value=client_mock):
+            result = _query_local_context_length("publisher/model-a", "http://localhost:1234/api/v1")
+
+        assert result == 32768
+        assert client_mock.get.call_args_list[0].args[0] == "http://localhost:1234/api/v1/models"
+
 
 class TestDetectLocalServerTypeAuth:
     def test_passes_bearer_token_to_probe_requests(self):
@@ -445,6 +470,24 @@ class TestDetectLocalServerTypeAuth:
             "Authorization": "Bearer lm-token"
         }
 
+    def test_native_api_base_url_is_not_doubled(self):
+        from agent.model_metadata import detect_local_server_type
+
+        resp = MagicMock()
+        resp.status_code = 200
+
+        client_mock = MagicMock()
+        client_mock.__enter__ = lambda s: client_mock
+        client_mock.__exit__ = MagicMock(return_value=False)
+        client_mock.get.return_value = resp
+
+        result = None
+        with patch("httpx.Client", return_value=client_mock):
+            result = detect_local_server_type("http://localhost:1234/api/v1")
+
+        assert result == "lm-studio"
+        assert client_mock.get.call_args_list[0].args[0] == "http://localhost:1234/api/v1/models"
+
 
 class TestFetchEndpointModelMetadataLmStudio:
     """fetch_endpoint_model_metadata should use LM Studio's native models endpoint."""
@@ -489,6 +532,33 @@ class TestFetchEndpointModelMetadataLmStudio:
         assert result["lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf"]["context_length"] == 131072
         assert result["Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf"]["context_length"] == 131072
 
+    def test_native_api_base_url_is_not_doubled(self):
+        from agent.model_metadata import fetch_endpoint_model_metadata
+
+        native_resp = self._make_resp(
+            {
+                "models": [
+                    {
+                        "key": "publisher/model-a",
+                        "id": "publisher/model-a",
+                        "loaded_instances": [
+                            {"config": {"context_length": 65536}}
+                        ],
+                    }
+                ]
+            }
+        )
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="lm-studio"), \
+             patch("agent.model_metadata.requests.get", return_value=native_resp) as mock_get:
+            result = fetch_endpoint_model_metadata(
+                "http://localhost:1234/api/v1",
+                force_refresh=True,
+            )
+
+        assert mock_get.call_args[0][0] == "http://localhost:1234/api/v1/models"
+        assert result["publisher/model-a"]["context_length"] == 65536
+
 
 class TestQueryLocalContextLengthNetworkError:
     """_query_local_context_length handles network failures gracefully."""
diff --git a/tests/hermes_cli/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py
index 6dacd5e353b..ad864f8cd9d 100644
--- a/tests/hermes_cli/test_api_key_providers.py
+++ b/tests/hermes_cli/test_api_key_providers.py
@@ -427,6 +427,15 @@ class TestResolveApiKeyProviderCredentials:
         assert creds["api_key"] == "lm-token"
         assert creds["base_url"] == "http://lmstudio.remote:4321/v1"
 
+    def test_resolve_lmstudio_normalizes_native_api_base_url_from_env(self, monkeypatch):
+        monkeypatch.setenv("LM_API_KEY", "lm-token")
+        monkeypatch.setenv("LM_BASE_URL", "http://lmstudio.remote:4321/api/v1")
+
+        creds = resolve_api_key_provider_credentials("lmstudio")
+
+        assert creds["provider"] == "lmstudio"
+        assert creds["base_url"] == "http://lmstudio.remote:4321/v1"
+
     def test_resolve_lmstudio_no_api_key_substitutes_placeholder(self, monkeypatch):
         # No-auth LM Studio: when LM_API_KEY isn't set, runtime credentials
         # carry a placeholder so gateway/TUI/cron paths see the local server
diff --git a/tests/hermes_cli/test_model_validation.py b/tests/hermes_cli/test_model_validation.py
index f5d356055c3..ca621f915d0 100644
--- a/tests/hermes_cli/test_model_validation.py
+++ b/tests/hermes_cli/test_model_validation.py
@@ -677,6 +677,19 @@ class TestValidateApiFallback:
 
         assert models == ["publisher/chat-model"]
 
+    def test_fetch_lmstudio_models_normalizes_native_api_base_url(self):
+        mock_resp = MagicMock()
+        mock_resp.__enter__.return_value = mock_resp
+        mock_resp.__exit__.return_value = False
+        mock_resp.read.return_value = b'{"models":[{"key":"publisher/chat-model","type":"llm"}]}'
+
+        with patch("hermes_cli.models.urllib.request.urlopen", return_value=mock_resp) as mock_urlopen:
+            models = fetch_lmstudio_models(base_url="http://localhost:1234/api/v1")
+
+        request = mock_urlopen.call_args[0][0]
+        assert request.full_url == "http://localhost:1234/api/v1/models"
+        assert models == ["publisher/chat-model"]
+
     def test_validate_lmstudio_rejects_embedding_models(self):
         mock_resp = MagicMock()
         mock_resp.__enter__.return_value = mock_resp
diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py
index 8e64223a3cd..de32869c4a5 100644
--- a/tests/hermes_cli/test_runtime_provider_resolution.py
+++ b/tests/hermes_cli/test_runtime_provider_resolution.py
@@ -416,6 +416,32 @@ def test_resolve_runtime_provider_lmstudio_saved_base_url_wins_over_env(monkeypa
     assert resolved["api_key"] == "dummy-lm-api-key"
 
 
+def test_resolve_runtime_provider_lmstudio_normalizes_native_api_saved_base_url(monkeypatch):
+    monkeypatch.delenv("LM_API_KEY", raising=False)
+    monkeypatch.delenv("LM_BASE_URL", raising=False)
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "lmstudio")
+    monkeypatch.setattr(
+        rp,
+        "_get_model_config",
+        lambda: {
+            "provider": "lmstudio",
+            "base_url": "http://192.168.1.10:1234/api/v1",
+            "default": "qwen/qwen3-coder-30b",
+        },
+    )
+    monkeypatch.setattr(
+        rp,
+        "load_pool",
+        lambda provider: type("Pool", (), {"has_credentials": lambda self: False})(),
+    )
+
+    resolved = rp.resolve_runtime_provider(requested="lmstudio")
+
+    assert resolved["provider"] == "lmstudio"
+    assert resolved["api_mode"] == "chat_completions"
+    assert resolved["base_url"] == "http://192.168.1.10:1234/v1"
+
+
 def test_resolve_runtime_provider_openrouter_explicit(monkeypatch):
     monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
     monkeypatch.setattr(rp, "_get_model_config", lambda: {})