perf(doctor): parallelize API connectivity checks and disable IMDS (#22766)

`hermes doctor` ran every connectivity probe sequentially and on a typical
developer laptop spent ~2s of its ~5s wall time inside boto3's EC2
instance-metadata-service lookup (169.254.169.254) — the default
AWS credential chain probes IMDS even when AWS_BEARER_TOKEN_BEDROCK
or AWS_ACCESS_KEY_ID is the only legitimate source.

Refactor the API Connectivity section so every probe (OpenRouter,
Anthropic, ~16 static API-key providers + dynamic profiles, AWS
Bedrock) is a pure function returning a structured result, then
fan them out through a ThreadPoolExecutor(max_workers=8). Output
order, glyphs, colours, padding, and issue strings stay byte-for-byte
identical to the sequential implementation; results are gathered
in submission order.

Also disable IMDS for the parallel block by setting
AWS_EC2_METADATA_DISABLED=true on the parent thread before submitting
work (and restoring its prior value in a finally block). Bedrock's
real-API call gets a Config(connect_timeout=5, read_timeout=10,
retries={max_attempts:1}) so a transient regional failure can't pad
the run by 30+ seconds.

Measured impact (5-run medians, 9950X3D):
  hermes doctor:           5.07 → 2.16 s  (-57%)

Doctor tests: 48 passed (test_doctor.py + test_doctor_command_install.py).

The remaining ~2s of wall is import overhead + a couple of one-off
network calls outside the API Connectivity section (`fetch_models_dev`
provider catalog refresh, Nous OAuth refresh in `Auth Providers`).
Those are next-tier targets, not part of this change.
This commit is contained in:
Teknium 2026-05-09 13:03:20 -07:00 committed by GitHub
parent 8f711f79a4
commit e612c3d6f0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1166,44 +1166,92 @@ def run_doctor(args):
# ========================================================================= # =========================================================================
print() print()
print(color("◆ API Connectivity", Colors.CYAN, Colors.BOLD)) print(color("◆ API Connectivity", Colors.CYAN, Colors.BOLD))
openrouter_key = os.getenv("OPENROUTER_API_KEY") # Refactor: every connectivity probe below is HTTP-bound and fully
if openrouter_key: # independent. Running them in series spent ~5s wall on a typical
print(" Checking OpenRouter API...", end="", flush=True) # workstation (2s of that was boto3's IMDS lookup for AWS credentials,
# which times out unless you're actually on EC2). Threading them with
# a small executor pool collapses the section to roughly the slowest
# single probe — about 2s — without changing the output format.
#
# Each ``_probe_*`` helper is a pure function: takes its inputs,
# makes one HTTP/SDK call, returns a ``_ConnectivityResult`` carrying
# the line(s) to print and any issue strings to append. No globals,
# no shared mutable state, no printing inside the workers.
import concurrent.futures as _futures
from collections import namedtuple as _namedtuple
_ConnectivityResult = _namedtuple(
"_ConnectivityResult", ["label", "lines", "issues"]
)
_probes: list = [] # list of (label, callable) submitted in display order
def _probe_openrouter() -> _ConnectivityResult:
key = os.getenv("OPENROUTER_API_KEY")
if not key:
return _ConnectivityResult(
"OpenRouter API",
[(color("", Colors.YELLOW), "OpenRouter API",
color("(not configured)", Colors.DIM))],
[],
)
try: try:
import httpx import httpx
response = httpx.get( r = httpx.get(
OPENROUTER_MODELS_URL, OPENROUTER_MODELS_URL,
headers={"Authorization": f"Bearer {openrouter_key}"}, headers={"Authorization": f"Bearer {key}"},
timeout=10 timeout=10,
) )
if response.status_code == 200: if r.status_code == 200:
print(f"\r {color('', Colors.GREEN)} OpenRouter API ") return _ConnectivityResult(
elif response.status_code == 401: "OpenRouter API",
print(f"\r {color('', Colors.RED)} OpenRouter API {color('(invalid API key)', Colors.DIM)} ") [(color("", Colors.GREEN), "OpenRouter API", "")],
issues.append("Check OPENROUTER_API_KEY in .env") [],
elif response.status_code == 402:
print(f"\r {color('', Colors.RED)} OpenRouter API {color('(out of credits — payment required)', Colors.DIM)}")
issues.append(
"OpenRouter account has insufficient credits. "
"Fix: run 'hermes config set model.provider <provider>' to switch providers, "
"or fund your OpenRouter account at https://openrouter.ai/settings/credits"
) )
elif response.status_code == 429: if r.status_code == 401:
print(f"\r {color('', Colors.RED)} OpenRouter API {color('(rate limited)', Colors.DIM)} ") return _ConnectivityResult(
issues.append("OpenRouter rate limit hit — consider switching to a different provider or waiting") "OpenRouter API",
else: [(color("", Colors.RED), "OpenRouter API",
print(f"\r {color('', Colors.RED)} OpenRouter API {color(f'(HTTP {response.status_code})', Colors.DIM)} ") color("(invalid API key)", Colors.DIM))],
["Check OPENROUTER_API_KEY in .env"],
)
if r.status_code == 402:
return _ConnectivityResult(
"OpenRouter API",
[(color("", Colors.RED), "OpenRouter API",
color("(out of credits — payment required)", Colors.DIM))],
["OpenRouter account has insufficient credits. "
"Fix: run 'hermes config set model.provider <provider>' "
"to switch providers, or fund your OpenRouter account "
"at https://openrouter.ai/settings/credits"],
)
if r.status_code == 429:
return _ConnectivityResult(
"OpenRouter API",
[(color("", Colors.RED), "OpenRouter API",
color("(rate limited)", Colors.DIM))],
["OpenRouter rate limit hit — consider switching to "
"a different provider or waiting"],
)
return _ConnectivityResult(
"OpenRouter API",
[(color("", Colors.RED), "OpenRouter API",
color(f"(HTTP {r.status_code})", Colors.DIM))],
[],
)
except Exception as e: except Exception as e:
print(f"\r {color('', Colors.RED)} OpenRouter API {color(f'({e})', Colors.DIM)} ") return _ConnectivityResult(
issues.append("Check network connectivity") "OpenRouter API",
else: [(color("", Colors.RED), "OpenRouter API",
check_warn("OpenRouter API", "(not configured)") color(f"({e})", Colors.DIM))],
["Check network connectivity"],
from hermes_cli.auth import get_anthropic_key )
anthropic_key = get_anthropic_key()
if anthropic_key: def _probe_anthropic() -> _ConnectivityResult:
print(" Checking Anthropic API...", end="", flush=True) from hermes_cli.auth import get_anthropic_key
key = get_anthropic_key()
if not key:
return _ConnectivityResult("Anthropic API", [], [])
try: try:
import httpx import httpx
from agent.anthropic_adapter import ( from agent.anthropic_adapter import (
@ -1212,140 +1260,247 @@ def run_doctor(args):
_OAUTH_ONLY_BETAS, _OAUTH_ONLY_BETAS,
_CONTEXT_1M_BETA, _CONTEXT_1M_BETA,
) )
headers = {"anthropic-version": "2023-06-01"} headers = {"anthropic-version": "2023-06-01"}
is_oauth = _is_oauth_token(anthropic_key) is_oauth = _is_oauth_token(key)
if is_oauth: if is_oauth:
headers["Authorization"] = f"Bearer {anthropic_key}" headers["Authorization"] = f"Bearer {key}"
headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS) headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
else: else:
headers["x-api-key"] = anthropic_key headers["x-api-key"] = key
response = httpx.get( r = httpx.get(
"https://api.anthropic.com/v1/models", "https://api.anthropic.com/v1/models",
headers=headers, headers=headers, timeout=10,
timeout=10
) )
# Reactive recovery: OAuth subscriptions that don't include 1M # Reactive recovery: OAuth subscriptions without 1M context reject the
# context reject the request with 400 "long context beta is not # request with 400 "long context beta is not yet available for this
# yet available for this subscription". Retry once with that # subscription". Retry once with that beta stripped so the doctor
# beta stripped so the doctor check doesn't falsely report the # check doesn't falsely report Anthropic as unreachable.
# Anthropic API as unreachable for those users.
if ( if (
is_oauth is_oauth
and response.status_code == 400 and r.status_code == 400
and "long context beta" in response.text.lower() and "long context beta" in r.text.lower()
and "not yet available" in response.text.lower() and "not yet available" in r.text.lower()
): ):
headers["anthropic-beta"] = ",".join( headers["anthropic-beta"] = ",".join(
[b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA] + list(_OAUTH_ONLY_BETAS) [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
+ list(_OAUTH_ONLY_BETAS)
) )
response = httpx.get( r = httpx.get(
"https://api.anthropic.com/v1/models", "https://api.anthropic.com/v1/models",
headers=headers, headers=headers, timeout=10,
timeout=10,
) )
if response.status_code == 200: if r.status_code == 200:
print(f"\r {color('', Colors.GREEN)} Anthropic API ") return _ConnectivityResult(
elif response.status_code == 401: "Anthropic API",
print(f"\r {color('', Colors.RED)} Anthropic API {color('(invalid API key)', Colors.DIM)} ") [(color("", Colors.GREEN), "Anthropic API", "")],
else: [],
msg = "(couldn't verify)" )
print(f"\r {color('', Colors.YELLOW)} Anthropic API {color(msg, Colors.DIM)} ") if r.status_code == 401:
return _ConnectivityResult(
"Anthropic API",
[(color("", Colors.RED), "Anthropic API",
color("(invalid API key)", Colors.DIM))],
[],
)
return _ConnectivityResult(
"Anthropic API",
[(color("", Colors.YELLOW), "Anthropic API",
color("(couldn't verify)", Colors.DIM))],
[],
)
except Exception as e: except Exception as e:
print(f"\r {color('', Colors.YELLOW)} Anthropic API {color(f'({e})', Colors.DIM)} ") return _ConnectivityResult(
"Anthropic API",
[(color("", Colors.YELLOW), "Anthropic API",
color(f"({e})", Colors.DIM))],
[],
)
def _probe_apikey_provider(pname, env_vars, default_url, base_env,
supports_health_check) -> _ConnectivityResult:
key = ""
for ev in env_vars:
key = os.getenv(ev, "")
if key:
break
if not key:
return _ConnectivityResult(pname, [], [])
label = pname.ljust(20)
if not supports_health_check:
return _ConnectivityResult(
pname,
[(color("", Colors.GREEN), label,
color("(key configured)", Colors.DIM))],
[],
)
try:
import httpx
base = os.getenv(base_env, "") if base_env else ""
# Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1
# (OpenAI-compat surface, which exposes /models for health check).
if not base and key.startswith("sk-kimi-"):
base = "https://api.kimi.com/coding/v1"
# Anthropic-compat endpoints (/anthropic, api.kimi.com/coding
# with no /v1) don't support /models. Rewrite to OpenAI-compat
# /v1 surface for health checks.
if base and base.rstrip("/").endswith("/anthropic"):
from agent.auxiliary_client import _to_openai_base_url
base = _to_openai_base_url(base)
if base_url_host_matches(base, "api.kimi.com") and base.rstrip("/").endswith("/coding"):
base = base.rstrip("/") + "/v1"
url = (base.rstrip("/") + "/models") if base else default_url
headers = {
"Authorization": f"Bearer {key}",
"User-Agent": _HERMES_USER_AGENT,
}
if base_url_host_matches(base, "api.kimi.com"):
headers["User-Agent"] = "claude-code/0.1.0"
r = httpx.get(url, headers=headers, timeout=10)
if (
pname == "Alibaba/DashScope"
and not base
and r.status_code == 401
):
r = httpx.get(
"https://dashscope.aliyuncs.com/compatible-mode/v1/models",
headers=headers, timeout=10,
)
if r.status_code == 200:
return _ConnectivityResult(
pname,
[(color("", Colors.GREEN), label, "")],
[],
)
if r.status_code == 401:
return _ConnectivityResult(
pname,
[(color("", Colors.RED), label,
color("(invalid API key)", Colors.DIM))],
[f"Check {env_vars[0]} in .env"],
)
return _ConnectivityResult(
pname,
[(color("", Colors.YELLOW), label,
color(f"(HTTP {r.status_code})", Colors.DIM))],
[],
)
except Exception as e:
return _ConnectivityResult(
pname,
[(color("", Colors.YELLOW), label,
color(f"({e})", Colors.DIM))],
[],
)
def _probe_bedrock() -> _ConnectivityResult:
try:
from agent.bedrock_adapter import (
has_aws_credentials,
resolve_aws_auth_env_var,
resolve_bedrock_region,
)
except ImportError:
return _ConnectivityResult("AWS Bedrock", [], [])
if not has_aws_credentials():
return _ConnectivityResult("AWS Bedrock", [], [])
auth_var = resolve_aws_auth_env_var()
region = resolve_bedrock_region()
label = "AWS Bedrock".ljust(20)
try:
import boto3
from botocore.config import Config as _BotoConfig
# Trim retries on the actual Bedrock API call so a transient
# failure doesn't pad the doctor run by 30+ seconds.
cfg = _BotoConfig(
connect_timeout=5,
read_timeout=10,
retries={"max_attempts": 1},
)
client = boto3.client("bedrock", region_name=region, config=cfg)
resp = client.list_foundation_models()
n = len(resp.get("modelSummaries", []))
return _ConnectivityResult(
"AWS Bedrock",
[(color("", Colors.GREEN), label,
color(f"({auth_var}, {region}, {n} models)", Colors.DIM))],
[],
)
except ImportError:
return _ConnectivityResult(
"AWS Bedrock",
[(color("", Colors.YELLOW), label,
color(f"(boto3 not installed — {sys.executable} -m pip install boto3)",
Colors.DIM))],
[f"Install boto3 for Bedrock: {sys.executable} -m pip install boto3"],
)
except Exception as e:
err_name = type(e).__name__
return _ConnectivityResult(
"AWS Bedrock",
[(color("", Colors.YELLOW), label,
color(f"({err_name}: {e})", Colors.DIM))],
[f"AWS Bedrock: {err_name} — check IAM permissions for "
f"bedrock:ListFoundationModels"],
)
# Build the probe submission list in display order
_probes.append(("OpenRouter API", _probe_openrouter))
_probes.append(("Anthropic API", _probe_anthropic))
# -- API-key providers --
# Tuple: (name, env_vars, default_url, base_env, supports_models_endpoint)
# If supports_models_endpoint is False, we skip the health check and just show "configured"
# Cached at module level after first build — profiles auto-extend it.
global _APIKEY_PROVIDERS_CACHE global _APIKEY_PROVIDERS_CACHE
if _APIKEY_PROVIDERS_CACHE is None: if _APIKEY_PROVIDERS_CACHE is None:
_APIKEY_PROVIDERS_CACHE = _build_apikey_providers_list() _APIKEY_PROVIDERS_CACHE = _build_apikey_providers_list()
_apikey_providers = _APIKEY_PROVIDERS_CACHE for _entry in _APIKEY_PROVIDERS_CACHE:
for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers: _pname, _env_vars, _default_url, _base_env, _supports = _entry
_key = "" # Capture loop vars by binding default args — without this, all closures
for _ev in _env_vars: # would share the final iteration's values and every probe would hit
_key = os.getenv(_ev, "") # the last provider's URL.
if _key: _probes.append((_pname, lambda p=_pname, e=_env_vars, u=_default_url,
break b=_base_env, s=_supports:
if _key: _probe_apikey_provider(p, e, u, b, s)))
_label = _pname.ljust(20)
# Some providers (like MiniMax) don't support /models endpoint
if not _supports_health_check:
print(f" {color('', Colors.GREEN)} {_label} {color('(key configured)', Colors.DIM)}")
continue
print(f" Checking {_pname} API...", end="", flush=True)
try:
import httpx
_base = os.getenv(_base_env, "") if _base_env else ""
# Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1
# (OpenAI-compat surface, which exposes /models for health check).
if not _base and _key.startswith("sk-kimi-"):
_base = "https://api.kimi.com/coding/v1"
# Anthropic-compat endpoints (/anthropic, api.kimi.com/coding
# with no /v1) don't support /models. Rewrite to the OpenAI-compat
# /v1 surface for health checks.
if _base and _base.rstrip("/").endswith("/anthropic"):
from agent.auxiliary_client import _to_openai_base_url
_base = _to_openai_base_url(_base)
if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
_base = _base.rstrip("/") + "/v1"
_url = (_base.rstrip("/") + "/models") if _base else _default_url
_headers = {
"Authorization": f"Bearer {_key}",
"User-Agent": _HERMES_USER_AGENT,
}
if base_url_host_matches(_base, "api.kimi.com"):
_headers["User-Agent"] = "claude-code/0.1.0"
_resp = httpx.get(
_url,
headers=_headers,
timeout=10,
)
if (
_pname == "Alibaba/DashScope"
and not _base
and _resp.status_code == 401
):
_resp = httpx.get(
"https://dashscope.aliyuncs.com/compatible-mode/v1/models",
headers=_headers,
timeout=10,
)
if _resp.status_code == 200:
print(f"\r {color('', Colors.GREEN)} {_label} ")
elif _resp.status_code == 401:
print(f"\r {color('', Colors.RED)} {_label} {color('(invalid API key)', Colors.DIM)} ")
issues.append(f"Check {_env_vars[0]} in .env")
else:
print(f"\r {color('', Colors.YELLOW)} {_label} {color(f'(HTTP {_resp.status_code})', Colors.DIM)} ")
except Exception as _e:
print(f"\r {color('', Colors.YELLOW)} {_label} {color(f'({_e})', Colors.DIM)} ")
# -- AWS Bedrock -- _probes.append(("AWS Bedrock", _probe_bedrock))
# Bedrock uses the AWS SDK credential chain, not API keys.
# Print a single status line so users see something happening, then
# fan out. ``\r`` clears it once the first real result line lands.
print(f" {color(f'Running {len(_probes)} connectivity checks in parallel…', Colors.DIM)}",
end="", flush=True)
# Disable boto3's EC2 instance-metadata-service probe for the duration
# of the parallel block. boto's default credential chain tries
# 169.254.169.254 with a multi-second timeout when we're not on EC2,
# which dominated the section's wall time before this fix
# (~2s on a developer laptop, even with the rest parallelized).
# Set on the parent thread before submitting work so the env-var
# mutation never races with another worker. has_aws_credentials() in
# the bedrock probe already gates on real env-var creds, so IMDS is
# never the legitimate source for `hermes doctor`.
_imds_prev = os.environ.get("AWS_EC2_METADATA_DISABLED")
os.environ["AWS_EC2_METADATA_DISABLED"] = "true"
try: try:
from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region # 8 workers is plenty — each probe is a single HTTP call plus a TLS
if has_aws_credentials(): # handshake. More than that wastes thread-startup cost and risks
_auth_var = resolve_aws_auth_env_var() # noisy output if anything ever printed from inside a worker.
_region = resolve_bedrock_region() with _futures.ThreadPoolExecutor(max_workers=8,
_label = "AWS Bedrock".ljust(20) thread_name_prefix="doctor-probe") as _ex:
print(f" Checking AWS Bedrock...", end="", flush=True) _futures_in_order = [_ex.submit(_fn) for _, _fn in _probes]
try: _results = [_f.result() for _f in _futures_in_order]
import boto3 finally:
_br_client = boto3.client("bedrock", region_name=_region) if _imds_prev is None:
_br_resp = _br_client.list_foundation_models() os.environ.pop("AWS_EC2_METADATA_DISABLED", None)
_model_count = len(_br_resp.get("modelSummaries", [])) else:
print(f"\r {color('', Colors.GREEN)} {_label} {color(f'({_auth_var}, {_region}, {_model_count} models)', Colors.DIM)} ") os.environ["AWS_EC2_METADATA_DISABLED"] = _imds_prev
except ImportError:
print(f"\r {color('', Colors.YELLOW)} {_label} {color(f'(boto3 not installed — {sys.executable} -m pip install boto3)', Colors.DIM)} ") # Clear the "Running …" line and print all results in submission order.
issues.append(f"Install boto3 for Bedrock: {sys.executable} -m pip install boto3") print("\r" + " " * 70 + "\r", end="")
except Exception as _e: for _r in _results:
_err_name = type(_e).__name__ for _glyph, _label, _detail in _r.lines:
print(f"\r {color('', Colors.YELLOW)} {_label} {color(f'({_err_name}: {_e})', Colors.DIM)} ") if _detail:
issues.append(f"AWS Bedrock: {_err_name} — check IAM permissions for bedrock:ListFoundationModels") print(f" {_glyph} {_label} {_detail}")
except ImportError: else:
pass # bedrock_adapter not available — skip silently print(f" {_glyph} {_label}")
for _issue in _r.issues:
issues.append(_issue)
# ========================================================================= # =========================================================================
# Check: Submodules # Check: Submodules