From 9eccb11edf692c2f701fda590d7a5e1dca4f7c09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Angelantoni?= Date: Wed, 15 Apr 2026 12:30:35 -0700 Subject: [PATCH 1/2] fix(agent): notify gateway users when all provider credentials are auth-exhausted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When all credentials for a provider are exhausted due to 401/403 failures, emit a plain-language _emit_status() notification so gateway users (Telegram, Discord, etc.) know their primary AI has become unavailable and what to do. Same-provider key rotation remains silent — the message only fires when rotation itself fails and Hermes is forced to fall back. This is distinct from the cooldown duration change in PR #10058 (which was closed). The notification half of that fix stands on its own: the configured fallback_model path already calls _emit_status() on provider switch, so this makes the credential pool exhaustion path consistent with that behavior. Closes #10476 --- run_agent.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/run_agent.py b/run_agent.py index d7d1249be..c45c3cfaa 100644 --- a/run_agent.py +++ b/run_agent.py @@ -4799,6 +4799,18 @@ class AIAgent: ) self._swap_credential(next_entry) return True, False + # All credentials for this provider are exhausted due to an auth + # failure. Emit a plain-language notification so gateway users + # (Telegram, Discord, etc.) know their primary AI is unavailable + # and what to do about it. Same-provider key rotation (above) + # remains silent — the message only fires when rotation fails. + _provider_label = getattr(self, "provider", "unknown") + self._emit_status( + f"⚠️ Primary AI ({_provider_label}) is unavailable — the API key " + f"may be invalid or expired (HTTP {rotate_status}). Switched to " + f"fallback if one is configured. To restore: check your API key " + f"and run `hermes auth reset {_provider_label}`." + ) return False, has_retried_429 From 116f8b6c29fc16565342a4eb4614ca7e73dc2a74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Angelantoni?= Date: Wed, 15 Apr 2026 12:33:06 -0700 Subject: [PATCH 2/2] test(agent): verify _emit_status fires on auth exhaustion, silent on rotation Two tests for TestAuthExhaustionNotification: - emits notification when all 401 credentials exhausted (rotate returns None) - stays silent when rotation to a next credential succeeds Matches the pattern of TestPoolRotationCycle in the same file. --- tests/agent/test_credential_pool_routing.py | 68 +++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/tests/agent/test_credential_pool_routing.py b/tests/agent/test_credential_pool_routing.py index 38f5c6dfd..544a5bd9c 100644 --- a/tests/agent/test_credential_pool_routing.py +++ b/tests/agent/test_credential_pool_routing.py @@ -348,3 +348,71 @@ class TestPoolRotationCycle: ) assert recovered is False assert has_retried is False + + +# --------------------------------------------------------------------------- +# 7. Auth exhaustion notification via _emit_status +# --------------------------------------------------------------------------- + +class TestAuthExhaustionNotification: + """Verify _emit_status is called when all credentials are 401-exhausted. + + Same-provider key rotation should remain silent — the notification only + fires when mark_exhausted_and_rotate() returns None (no credentials left). + """ + + def _make_agent(self, *, next_entry=None): + """Minimal AIAgent with a credential pool that returns next_entry on rotate.""" + from run_agent import AIAgent + from agent.error_classifier import FailoverReason + + with patch.object(AIAgent, "__init__", lambda self, **kw: None): + agent = AIAgent() + + pool = MagicMock() + pool.has_credentials.return_value = True + pool.try_refresh_current.return_value = None # refresh always fails + pool.mark_exhausted_and_rotate.return_value = next_entry + agent._credential_pool = pool + agent._swap_credential = MagicMock() + agent.log_prefix = "" + agent.provider = "kimi-coding" + agent._emit_status = MagicMock() + + return agent, pool + + def test_emits_notification_when_all_credentials_exhausted(self): + """When pool is fully exhausted on 401, _emit_status must fire.""" + from agent.error_classifier import FailoverReason + + agent, _ = self._make_agent(next_entry=None) # no credentials left + + recovered, _ = agent._recover_with_credential_pool( + status_code=401, + has_retried_429=False, + classified_reason=FailoverReason.auth, + ) + + assert recovered is False + agent._emit_status.assert_called_once() + msg = agent._emit_status.call_args[0][0] + assert "kimi-coding" in msg + assert "401" in msg + assert "hermes auth reset" in msg + + def test_silent_when_rotation_to_next_credential_succeeds(self): + """When a next credential is available, rotation is silent — no _emit_status.""" + from agent.error_classifier import FailoverReason + + next_entry = MagicMock() + next_entry.id = "cred-2" + agent, _ = self._make_agent(next_entry=next_entry) + + recovered, _ = agent._recover_with_credential_pool( + status_code=401, + has_retried_429=False, + classified_reason=FailoverReason.auth, + ) + + assert recovered is True + agent._emit_status.assert_not_called()