diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 27c67c10a3..2887447d1e 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1047,6 +1047,32 @@ def _is_payment_error(exc: Exception) -> bool: return False +def _is_connection_error(exc: Exception) -> bool: + """Detect connection/network errors that warrant provider fallback. + + Returns True for errors indicating the provider endpoint is unreachable + (DNS failure, connection refused, TLS errors, timeouts). These are + distinct from API errors (4xx/5xx) which indicate the provider IS + reachable but returned an error. + """ + from openai import APIConnectionError, APITimeoutError + + if isinstance(exc, (APIConnectionError, APITimeoutError)): + return True + # urllib3 / httpx / httpcore connection errors + err_type = type(exc).__name__ + if any(kw in err_type for kw in ("Connection", "Timeout", "DNS", "SSL")): + return True + err_lower = str(exc).lower() + if any(kw in err_lower for kw in ( + "connection refused", "name or service not known", + "no route to host", "network is unreachable", + "timed out", "connection reset", + )): + return True + return False + + def _try_payment_fallback( failed_provider: str, task: str = None, @@ -2093,7 +2119,18 @@ def call_llm( # try alternative providers instead of giving up. This handles the # common case where a user runs out of OpenRouter credits but has # Codex OAuth or another provider available. - if _is_payment_error(first_err): + # + # ── Connection error fallback ──────────────────────────────── + # When a provider endpoint is unreachable (DNS failure, connection + # refused, timeout), try alternative providers. This handles stale + # Codex/OAuth tokens that authenticate but whose endpoint is down, + # and providers the user never configured that got picked up by + # the auto-detection chain. + should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err) + if should_fallback: + reason = "payment error" if _is_payment_error(first_err) else "connection error" + logger.info("Auxiliary %s: %s on %s (%s), trying fallback", + task or "call", reason, resolved_provider, first_err) fb_client, fb_model, fb_label = _try_payment_fallback( resolved_provider, task) if fb_client is not None: diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index b7360fdd32..6689e5fb7d 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -3017,12 +3017,15 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: _save_provider_state(auth_store, "nous", auth_state) saved_to = _save_auth_store(auth_store) - config_path = _update_config_for_provider("nous", inference_base_url) print() print("Login successful!") print(f" Auth state: {saved_to}") - print(f" Config updated: {config_path} (model.provider=nous)") + # Resolve model BEFORE writing provider to config.yaml so we never + # leave the config in a half-updated state (provider=nous but model + # still set to the previous provider's model, e.g. opus from + # OpenRouter). The auth.json active_provider was already set above. + selected_model = None try: runtime_key = auth_state.get("agent_key") or auth_state.get("access_token") if not isinstance(runtime_key, str) or not runtime_key: @@ -3056,9 +3059,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: unavailable_models=unavailable_models, portal_url=_portal, ) - if selected_model: - _save_model_choice(selected_model) - print(f"Default model set to: {selected_model}") elif unavailable_models: _url = (_portal or DEFAULT_NOUS_PORTAL_URL).rstrip("/") print("No free models currently available.") @@ -3070,6 +3070,15 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: print() print(f"Login succeeded, but could not fetch available models. Reason: {message}") + # Write provider + model atomically so config is never mismatched. + config_path = _update_config_for_provider( + "nous", inference_base_url, default_model=selected_model, + ) + if selected_model: + _save_model_choice(selected_model) + print(f"Default model set to: {selected_model}") + print(f" Config updated: {config_path} (model.provider=nous)") + except KeyboardInterrupt: print("\nLogin cancelled.") raise SystemExit(130)