From e612c3d6f00624868ce3f73bb6beaacfea36337f Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 9 May 2026 13:03:20 -0700 Subject: [PATCH] perf(doctor): parallelize API connectivity checks and disable IMDS (#22766) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `hermes doctor` ran every connectivity probe sequentially and on a typical developer laptop spent ~2s of its ~5s wall time inside boto3's EC2 instance-metadata-service lookup (169.254.169.254) — the default AWS credential chain probes IMDS even when AWS_BEARER_TOKEN_BEDROCK or AWS_ACCESS_KEY_ID is the only legitimate source. Refactor the API Connectivity section so every probe (OpenRouter, Anthropic, ~16 static API-key providers + dynamic profiles, AWS Bedrock) is a pure function returning a structured result, then fan them out through a ThreadPoolExecutor(max_workers=8). Output order, glyphs, colours, padding, and issue strings stay byte-for-byte identical to the sequential implementation; results are gathered in submission order. Also disable IMDS for the parallel block by setting AWS_EC2_METADATA_DISABLED=true on the parent thread before submitting work (and restoring its prior value in a finally block). Bedrock's real-API call gets a Config(connect_timeout=5, read_timeout=10, retries={max_attempts:1}) so a transient regional failure can't pad the run by 30+ seconds. Measured impact (5-run medians, 9950X3D): hermes doctor: 5.07 → 2.16 s (-57%) Doctor tests: 48 passed (test_doctor.py + test_doctor_command_install.py). The remaining ~2s of wall is import overhead + a couple of one-off network calls outside the API Connectivity section (`fetch_models_dev` provider catalog refresh, Nous OAuth refresh in `Auth Providers`). Those are next-tier targets, not part of this change. --- hermes_cli/doctor.py | 447 +++++++++++++++++++++++++++++-------------- 1 file changed, 301 insertions(+), 146 deletions(-) diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 2b66318487f..7df69979cdd 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -1166,44 +1166,92 @@ def run_doctor(args): # ========================================================================= print() print(color("◆ API Connectivity", Colors.CYAN, Colors.BOLD)) - - openrouter_key = os.getenv("OPENROUTER_API_KEY") - if openrouter_key: - print(" Checking OpenRouter API...", end="", flush=True) + + # Refactor: every connectivity probe below is HTTP-bound and fully + # independent. Running them in series spent ~5s wall on a typical + # workstation (2s of that was boto3's IMDS lookup for AWS credentials, + # which times out unless you're actually on EC2). Threading them with + # a small executor pool collapses the section to roughly the slowest + # single probe — about 2s — without changing the output format. + # + # Each ``_probe_*`` helper is a pure function: takes its inputs, + # makes one HTTP/SDK call, returns a ``_ConnectivityResult`` carrying + # the line(s) to print and any issue strings to append. No globals, + # no shared mutable state, no printing inside the workers. + import concurrent.futures as _futures + from collections import namedtuple as _namedtuple + + _ConnectivityResult = _namedtuple( + "_ConnectivityResult", ["label", "lines", "issues"] + ) + _probes: list = [] # list of (label, callable) submitted in display order + + def _probe_openrouter() -> _ConnectivityResult: + key = os.getenv("OPENROUTER_API_KEY") + if not key: + return _ConnectivityResult( + "OpenRouter API", + [(color("⚠", Colors.YELLOW), "OpenRouter API", + color("(not configured)", Colors.DIM))], + [], + ) try: import httpx - response = httpx.get( + r = httpx.get( OPENROUTER_MODELS_URL, - headers={"Authorization": f"Bearer {openrouter_key}"}, - timeout=10 + headers={"Authorization": f"Bearer {key}"}, + timeout=10, ) - if response.status_code == 200: - print(f"\r {color('✓', Colors.GREEN)} OpenRouter API ") - elif response.status_code == 401: - print(f"\r {color('✗', Colors.RED)} OpenRouter API {color('(invalid API key)', Colors.DIM)} ") - issues.append("Check OPENROUTER_API_KEY in .env") - elif response.status_code == 402: - print(f"\r {color('✗', Colors.RED)} OpenRouter API {color('(out of credits — payment required)', Colors.DIM)}") - issues.append( - "OpenRouter account has insufficient credits. " - "Fix: run 'hermes config set model.provider ' to switch providers, " - "or fund your OpenRouter account at https://openrouter.ai/settings/credits" + if r.status_code == 200: + return _ConnectivityResult( + "OpenRouter API", + [(color("✓", Colors.GREEN), "OpenRouter API", "")], + [], ) - elif response.status_code == 429: - print(f"\r {color('✗', Colors.RED)} OpenRouter API {color('(rate limited)', Colors.DIM)} ") - issues.append("OpenRouter rate limit hit — consider switching to a different provider or waiting") - else: - print(f"\r {color('✗', Colors.RED)} OpenRouter API {color(f'(HTTP {response.status_code})', Colors.DIM)} ") + if r.status_code == 401: + return _ConnectivityResult( + "OpenRouter API", + [(color("✗", Colors.RED), "OpenRouter API", + color("(invalid API key)", Colors.DIM))], + ["Check OPENROUTER_API_KEY in .env"], + ) + if r.status_code == 402: + return _ConnectivityResult( + "OpenRouter API", + [(color("✗", Colors.RED), "OpenRouter API", + color("(out of credits — payment required)", Colors.DIM))], + ["OpenRouter account has insufficient credits. " + "Fix: run 'hermes config set model.provider ' " + "to switch providers, or fund your OpenRouter account " + "at https://openrouter.ai/settings/credits"], + ) + if r.status_code == 429: + return _ConnectivityResult( + "OpenRouter API", + [(color("✗", Colors.RED), "OpenRouter API", + color("(rate limited)", Colors.DIM))], + ["OpenRouter rate limit hit — consider switching to " + "a different provider or waiting"], + ) + return _ConnectivityResult( + "OpenRouter API", + [(color("✗", Colors.RED), "OpenRouter API", + color(f"(HTTP {r.status_code})", Colors.DIM))], + [], + ) except Exception as e: - print(f"\r {color('✗', Colors.RED)} OpenRouter API {color(f'({e})', Colors.DIM)} ") - issues.append("Check network connectivity") - else: - check_warn("OpenRouter API", "(not configured)") - - from hermes_cli.auth import get_anthropic_key - anthropic_key = get_anthropic_key() - if anthropic_key: - print(" Checking Anthropic API...", end="", flush=True) + return _ConnectivityResult( + "OpenRouter API", + [(color("✗", Colors.RED), "OpenRouter API", + color(f"({e})", Colors.DIM))], + ["Check network connectivity"], + ) + + def _probe_anthropic() -> _ConnectivityResult: + from hermes_cli.auth import get_anthropic_key + key = get_anthropic_key() + if not key: + return _ConnectivityResult("Anthropic API", [], []) try: import httpx from agent.anthropic_adapter import ( @@ -1212,140 +1260,247 @@ def run_doctor(args): _OAUTH_ONLY_BETAS, _CONTEXT_1M_BETA, ) - headers = {"anthropic-version": "2023-06-01"} - is_oauth = _is_oauth_token(anthropic_key) + is_oauth = _is_oauth_token(key) if is_oauth: - headers["Authorization"] = f"Bearer {anthropic_key}" + headers["Authorization"] = f"Bearer {key}" headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS) else: - headers["x-api-key"] = anthropic_key - response = httpx.get( + headers["x-api-key"] = key + r = httpx.get( "https://api.anthropic.com/v1/models", - headers=headers, - timeout=10 + headers=headers, timeout=10, ) - # Reactive recovery: OAuth subscriptions that don't include 1M - # context reject the request with 400 "long context beta is not - # yet available for this subscription". Retry once with that - # beta stripped so the doctor check doesn't falsely report the - # Anthropic API as unreachable for those users. + # Reactive recovery: OAuth subscriptions without 1M context reject the + # request with 400 "long context beta is not yet available for this + # subscription". Retry once with that beta stripped so the doctor + # check doesn't falsely report Anthropic as unreachable. if ( is_oauth - and response.status_code == 400 - and "long context beta" in response.text.lower() - and "not yet available" in response.text.lower() + and r.status_code == 400 + and "long context beta" in r.text.lower() + and "not yet available" in r.text.lower() ): headers["anthropic-beta"] = ",".join( - [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA] + list(_OAUTH_ONLY_BETAS) + [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA] + + list(_OAUTH_ONLY_BETAS) ) - response = httpx.get( + r = httpx.get( "https://api.anthropic.com/v1/models", - headers=headers, - timeout=10, + headers=headers, timeout=10, ) - if response.status_code == 200: - print(f"\r {color('✓', Colors.GREEN)} Anthropic API ") - elif response.status_code == 401: - print(f"\r {color('✗', Colors.RED)} Anthropic API {color('(invalid API key)', Colors.DIM)} ") - else: - msg = "(couldn't verify)" - print(f"\r {color('⚠', Colors.YELLOW)} Anthropic API {color(msg, Colors.DIM)} ") + if r.status_code == 200: + return _ConnectivityResult( + "Anthropic API", + [(color("✓", Colors.GREEN), "Anthropic API", "")], + [], + ) + if r.status_code == 401: + return _ConnectivityResult( + "Anthropic API", + [(color("✗", Colors.RED), "Anthropic API", + color("(invalid API key)", Colors.DIM))], + [], + ) + return _ConnectivityResult( + "Anthropic API", + [(color("⚠", Colors.YELLOW), "Anthropic API", + color("(couldn't verify)", Colors.DIM))], + [], + ) except Exception as e: - print(f"\r {color('⚠', Colors.YELLOW)} Anthropic API {color(f'({e})', Colors.DIM)} ") + return _ConnectivityResult( + "Anthropic API", + [(color("⚠", Colors.YELLOW), "Anthropic API", + color(f"({e})", Colors.DIM))], + [], + ) + + def _probe_apikey_provider(pname, env_vars, default_url, base_env, + supports_health_check) -> _ConnectivityResult: + key = "" + for ev in env_vars: + key = os.getenv(ev, "") + if key: + break + if not key: + return _ConnectivityResult(pname, [], []) + label = pname.ljust(20) + if not supports_health_check: + return _ConnectivityResult( + pname, + [(color("✓", Colors.GREEN), label, + color("(key configured)", Colors.DIM))], + [], + ) + try: + import httpx + base = os.getenv(base_env, "") if base_env else "" + # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1 + # (OpenAI-compat surface, which exposes /models for health check). + if not base and key.startswith("sk-kimi-"): + base = "https://api.kimi.com/coding/v1" + # Anthropic-compat endpoints (/anthropic, api.kimi.com/coding + # with no /v1) don't support /models. Rewrite to OpenAI-compat + # /v1 surface for health checks. + if base and base.rstrip("/").endswith("/anthropic"): + from agent.auxiliary_client import _to_openai_base_url + base = _to_openai_base_url(base) + if base_url_host_matches(base, "api.kimi.com") and base.rstrip("/").endswith("/coding"): + base = base.rstrip("/") + "/v1" + url = (base.rstrip("/") + "/models") if base else default_url + headers = { + "Authorization": f"Bearer {key}", + "User-Agent": _HERMES_USER_AGENT, + } + if base_url_host_matches(base, "api.kimi.com"): + headers["User-Agent"] = "claude-code/0.1.0" + r = httpx.get(url, headers=headers, timeout=10) + if ( + pname == "Alibaba/DashScope" + and not base + and r.status_code == 401 + ): + r = httpx.get( + "https://dashscope.aliyuncs.com/compatible-mode/v1/models", + headers=headers, timeout=10, + ) + if r.status_code == 200: + return _ConnectivityResult( + pname, + [(color("✓", Colors.GREEN), label, "")], + [], + ) + if r.status_code == 401: + return _ConnectivityResult( + pname, + [(color("✗", Colors.RED), label, + color("(invalid API key)", Colors.DIM))], + [f"Check {env_vars[0]} in .env"], + ) + return _ConnectivityResult( + pname, + [(color("⚠", Colors.YELLOW), label, + color(f"(HTTP {r.status_code})", Colors.DIM))], + [], + ) + except Exception as e: + return _ConnectivityResult( + pname, + [(color("⚠", Colors.YELLOW), label, + color(f"({e})", Colors.DIM))], + [], + ) + + def _probe_bedrock() -> _ConnectivityResult: + try: + from agent.bedrock_adapter import ( + has_aws_credentials, + resolve_aws_auth_env_var, + resolve_bedrock_region, + ) + except ImportError: + return _ConnectivityResult("AWS Bedrock", [], []) + if not has_aws_credentials(): + return _ConnectivityResult("AWS Bedrock", [], []) + auth_var = resolve_aws_auth_env_var() + region = resolve_bedrock_region() + label = "AWS Bedrock".ljust(20) + try: + import boto3 + from botocore.config import Config as _BotoConfig + # Trim retries on the actual Bedrock API call so a transient + # failure doesn't pad the doctor run by 30+ seconds. + cfg = _BotoConfig( + connect_timeout=5, + read_timeout=10, + retries={"max_attempts": 1}, + ) + client = boto3.client("bedrock", region_name=region, config=cfg) + resp = client.list_foundation_models() + n = len(resp.get("modelSummaries", [])) + return _ConnectivityResult( + "AWS Bedrock", + [(color("✓", Colors.GREEN), label, + color(f"({auth_var}, {region}, {n} models)", Colors.DIM))], + [], + ) + except ImportError: + return _ConnectivityResult( + "AWS Bedrock", + [(color("⚠", Colors.YELLOW), label, + color(f"(boto3 not installed — {sys.executable} -m pip install boto3)", + Colors.DIM))], + [f"Install boto3 for Bedrock: {sys.executable} -m pip install boto3"], + ) + except Exception as e: + err_name = type(e).__name__ + return _ConnectivityResult( + "AWS Bedrock", + [(color("⚠", Colors.YELLOW), label, + color(f"({err_name}: {e})", Colors.DIM))], + [f"AWS Bedrock: {err_name} — check IAM permissions for " + f"bedrock:ListFoundationModels"], + ) + + # Build the probe submission list in display order + _probes.append(("OpenRouter API", _probe_openrouter)) + _probes.append(("Anthropic API", _probe_anthropic)) - # -- API-key providers -- - # Tuple: (name, env_vars, default_url, base_env, supports_models_endpoint) - # If supports_models_endpoint is False, we skip the health check and just show "configured" - # Cached at module level after first build — profiles auto-extend it. global _APIKEY_PROVIDERS_CACHE if _APIKEY_PROVIDERS_CACHE is None: _APIKEY_PROVIDERS_CACHE = _build_apikey_providers_list() - _apikey_providers = _APIKEY_PROVIDERS_CACHE - for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers: - _key = "" - for _ev in _env_vars: - _key = os.getenv(_ev, "") - if _key: - break - if _key: - _label = _pname.ljust(20) - # Some providers (like MiniMax) don't support /models endpoint - if not _supports_health_check: - print(f" {color('✓', Colors.GREEN)} {_label} {color('(key configured)', Colors.DIM)}") - continue - print(f" Checking {_pname} API...", end="", flush=True) - try: - import httpx - _base = os.getenv(_base_env, "") if _base_env else "" - # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1 - # (OpenAI-compat surface, which exposes /models for health check). - if not _base and _key.startswith("sk-kimi-"): - _base = "https://api.kimi.com/coding/v1" - # Anthropic-compat endpoints (/anthropic, api.kimi.com/coding - # with no /v1) don't support /models. Rewrite to the OpenAI-compat - # /v1 surface for health checks. - if _base and _base.rstrip("/").endswith("/anthropic"): - from agent.auxiliary_client import _to_openai_base_url - _base = _to_openai_base_url(_base) - if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"): - _base = _base.rstrip("/") + "/v1" - _url = (_base.rstrip("/") + "/models") if _base else _default_url - _headers = { - "Authorization": f"Bearer {_key}", - "User-Agent": _HERMES_USER_AGENT, - } - if base_url_host_matches(_base, "api.kimi.com"): - _headers["User-Agent"] = "claude-code/0.1.0" - _resp = httpx.get( - _url, - headers=_headers, - timeout=10, - ) - if ( - _pname == "Alibaba/DashScope" - and not _base - and _resp.status_code == 401 - ): - _resp = httpx.get( - "https://dashscope.aliyuncs.com/compatible-mode/v1/models", - headers=_headers, - timeout=10, - ) - if _resp.status_code == 200: - print(f"\r {color('✓', Colors.GREEN)} {_label} ") - elif _resp.status_code == 401: - print(f"\r {color('✗', Colors.RED)} {_label} {color('(invalid API key)', Colors.DIM)} ") - issues.append(f"Check {_env_vars[0]} in .env") - else: - print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color(f'(HTTP {_resp.status_code})', Colors.DIM)} ") - except Exception as _e: - print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color(f'({_e})', Colors.DIM)} ") + for _entry in _APIKEY_PROVIDERS_CACHE: + _pname, _env_vars, _default_url, _base_env, _supports = _entry + # Capture loop vars by binding default args — without this, all closures + # would share the final iteration's values and every probe would hit + # the last provider's URL. + _probes.append((_pname, lambda p=_pname, e=_env_vars, u=_default_url, + b=_base_env, s=_supports: + _probe_apikey_provider(p, e, u, b, s))) - # -- AWS Bedrock -- - # Bedrock uses the AWS SDK credential chain, not API keys. + _probes.append(("AWS Bedrock", _probe_bedrock)) + + # Print a single status line so users see something happening, then + # fan out. ``\r`` clears it once the first real result line lands. + print(f" {color(f'Running {len(_probes)} connectivity checks in parallel…', Colors.DIM)}", + end="", flush=True) + + # Disable boto3's EC2 instance-metadata-service probe for the duration + # of the parallel block. boto's default credential chain tries + # 169.254.169.254 with a multi-second timeout when we're not on EC2, + # which dominated the section's wall time before this fix + # (~2s on a developer laptop, even with the rest parallelized). + # Set on the parent thread before submitting work so the env-var + # mutation never races with another worker. has_aws_credentials() in + # the bedrock probe already gates on real env-var creds, so IMDS is + # never the legitimate source for `hermes doctor`. + _imds_prev = os.environ.get("AWS_EC2_METADATA_DISABLED") + os.environ["AWS_EC2_METADATA_DISABLED"] = "true" try: - from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region - if has_aws_credentials(): - _auth_var = resolve_aws_auth_env_var() - _region = resolve_bedrock_region() - _label = "AWS Bedrock".ljust(20) - print(f" Checking AWS Bedrock...", end="", flush=True) - try: - import boto3 - _br_client = boto3.client("bedrock", region_name=_region) - _br_resp = _br_client.list_foundation_models() - _model_count = len(_br_resp.get("modelSummaries", [])) - print(f"\r {color('✓', Colors.GREEN)} {_label} {color(f'({_auth_var}, {_region}, {_model_count} models)', Colors.DIM)} ") - except ImportError: - print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color(f'(boto3 not installed — {sys.executable} -m pip install boto3)', Colors.DIM)} ") - issues.append(f"Install boto3 for Bedrock: {sys.executable} -m pip install boto3") - except Exception as _e: - _err_name = type(_e).__name__ - print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color(f'({_err_name}: {_e})', Colors.DIM)} ") - issues.append(f"AWS Bedrock: {_err_name} — check IAM permissions for bedrock:ListFoundationModels") - except ImportError: - pass # bedrock_adapter not available — skip silently + # 8 workers is plenty — each probe is a single HTTP call plus a TLS + # handshake. More than that wastes thread-startup cost and risks + # noisy output if anything ever printed from inside a worker. + with _futures.ThreadPoolExecutor(max_workers=8, + thread_name_prefix="doctor-probe") as _ex: + _futures_in_order = [_ex.submit(_fn) for _, _fn in _probes] + _results = [_f.result() for _f in _futures_in_order] + finally: + if _imds_prev is None: + os.environ.pop("AWS_EC2_METADATA_DISABLED", None) + else: + os.environ["AWS_EC2_METADATA_DISABLED"] = _imds_prev + + # Clear the "Running …" line and print all results in submission order. + print("\r" + " " * 70 + "\r", end="") + for _r in _results: + for _glyph, _label, _detail in _r.lines: + if _detail: + print(f" {_glyph} {_label} {_detail}") + else: + print(f" {_glyph} {_label}") + for _issue in _r.issues: + issues.append(_issue) # ========================================================================= # Check: Submodules