fix(delegate): resolve custom-endpoint subagent pools by endpoint identity (#41730)

Subagents delegated to a custom endpoint were misrouted when the parent
ran on a different custom endpoint. Both runtimes collapse to
provider="custom", so _resolve_child_credential_pool() treated them as
interchangeable and handed the child the parent's pool. Leasing from it
then overwrote the child's delegated base_url with the parent's endpoint
via _swap_credential() — the child sent the delegated model name to the
wrong endpoint.

Custom runtimes now resolve by endpoint identity (the custom:<name> pool
key derived from base_url). The parent pool is reused only when both
parent and child resolve to the same custom endpoint; unregistered raw
endpoints return None so the child keeps its fixed delegated credential.
Non-custom provider paths are unchanged.

Fixes #7833.
This commit is contained in:
Teknium 2026-06-07 22:05:14 -07:00 committed by GitHub
parent bddc5fd087
commit 48ae8029aa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 124 additions and 3 deletions

View file

@ -374,7 +374,7 @@ def _iter_custom_providers(config: Optional[dict] = None):
yield _normalize_custom_pool_name(name), entry
def get_custom_provider_pool_key(base_url: str, provider_name: Optional[str] = None) -> Optional[str]:
def get_custom_provider_pool_key(base_url: Optional[str], provider_name: Optional[str] = None) -> Optional[str]:
"""Look up the custom_providers list in config.yaml and return 'custom:<name>' for a matching base_url.
When provider_name is given, prefer matching by name first (solving the case where

View file

@ -1518,6 +1518,73 @@ class TestChildCredentialPoolResolution(unittest.TestCase):
self.assertIsNone(result)
# --- Custom-endpoint identity resolution (issue #7833) ---
def test_custom_different_endpoint_does_not_inherit_parent_pool(self):
"""A child on custom endpoint B must not inherit the parent's custom
endpoint A pool just because both normalize to provider='custom'."""
parent = _make_mock_parent()
parent.provider = "custom"
parent.base_url = "https://endpoint-a.example.com/v1"
parent._credential_pool = MagicMock(name="parent_custom_a_pool")
child_pool = MagicMock(name="endpoint_b_pool")
child_pool.has_credentials.return_value = True
def fake_key(base_url, provider_name=None):
return {
"https://endpoint-a.example.com/v1": "custom:endpoint-a",
"https://endpoint-b.example.com/v1": "custom:endpoint-b",
}.get(base_url)
with patch("agent.credential_pool.get_custom_provider_pool_key", side_effect=fake_key), \
patch("agent.credential_pool.load_pool", return_value=child_pool) as load_mock:
result = _resolve_child_credential_pool(
"custom", parent, "https://endpoint-b.example.com/v1"
)
# Loaded the child's OWN endpoint pool, not the parent's.
load_mock.assert_called_once_with("custom:endpoint-b")
self.assertIs(result, child_pool)
self.assertIsNot(result, parent._credential_pool)
def test_custom_same_endpoint_shares_parent_pool(self):
"""A child on the SAME custom endpoint as the parent reuses the parent's
pool so rotation/cooldown state stays synchronized."""
parent = _make_mock_parent()
parent.provider = "custom"
parent.base_url = "https://endpoint-a.example.com/v1"
parent._credential_pool = MagicMock(name="parent_custom_a_pool")
with patch(
"agent.credential_pool.get_custom_provider_pool_key",
return_value="custom:endpoint-a",
):
result = _resolve_child_credential_pool(
"custom", parent, "https://endpoint-a.example.com/v1"
)
self.assertIs(result, parent._credential_pool)
def test_custom_unregistered_endpoint_returns_none(self):
"""A raw delegation.base_url with no matching custom_providers entry
must NOT inherit the parent's pool — return None so the child keeps its
fixed delegated credential."""
parent = _make_mock_parent()
parent.provider = "custom"
parent.base_url = "https://endpoint-a.example.com/v1"
parent._credential_pool = MagicMock(name="parent_custom_a_pool")
with patch(
"agent.credential_pool.get_custom_provider_pool_key",
return_value=None,
):
result = _resolve_child_credential_pool(
"custom", parent, "https://raw-unregistered.example.com/v1"
)
self.assertIsNone(result)
def test_build_child_agent_assigns_parent_pool_when_shared(self):
parent = _make_mock_parent()
mock_pool = MagicMock()

View file

@ -1184,7 +1184,9 @@ def _build_child_agent(
# Share a credential pool with the child when possible so subagents can
# rotate credentials on rate limits instead of getting pinned to one key.
child_pool = _resolve_child_credential_pool(effective_provider, parent_agent)
child_pool = _resolve_child_credential_pool(
effective_provider, parent_agent, effective_base_url
)
if child_pool is not None:
child._credential_pool = child_pool
@ -2368,7 +2370,11 @@ def delegate_task(
)
def _resolve_child_credential_pool(effective_provider: Optional[str], parent_agent):
def _resolve_child_credential_pool(
effective_provider: Optional[str],
parent_agent,
effective_base_url: Optional[str] = None,
):
"""Resolve a credential pool for the child agent.
Rules:
@ -2377,12 +2383,60 @@ def _resolve_child_credential_pool(effective_provider: Optional[str], parent_age
2. Different provider -> try to load that provider's own pool.
3. No pool available -> return None and let the child keep the inherited
fixed credential behavior.
Custom endpoints are a special case: every direct ``delegation.base_url``
runtime collapses to ``provider="custom"``, so bare provider equality would
treat two *different* custom endpoints as interchangeable and let the child
inherit the parent's pool. Leasing from that pool then overwrites the
child's delegated ``base_url`` with the parent's endpoint (issue #7833).
We therefore resolve custom runtimes by endpoint identity (the
``custom:<name>`` pool key derived from the base_url) and only share the
parent's pool when both resolve to the *same* custom endpoint.
"""
if not effective_provider:
return getattr(parent_agent, "_credential_pool", None)
parent_provider = getattr(parent_agent, "provider", None) or ""
parent_pool = getattr(parent_agent, "_credential_pool", None)
# Custom endpoints: distinguish by endpoint identity, not the bare "custom"
# provider string. Two custom runtimes are only interchangeable when they
# resolve to the same custom:<name> pool key.
if effective_provider == "custom":
try:
from agent.credential_pool import get_custom_provider_pool_key, load_pool
child_key = get_custom_provider_pool_key(effective_base_url)
if child_key is None:
# Unregistered endpoint (raw delegation.base_url with no
# matching custom_providers entry) -> no shared pool exists.
# Keep the child's fixed delegated credential rather than
# risk inheriting the parent's custom endpoint.
return None
# Reuse the parent's pool only when it is the same custom endpoint.
parent_key = get_custom_provider_pool_key(
getattr(parent_agent, "base_url", None)
)
if (
parent_pool is not None
and parent_provider == "custom"
and parent_key is not None
and parent_key == child_key
):
return parent_pool
pool = load_pool(child_key)
if pool is not None and pool.has_credentials():
return pool
except Exception as exc:
logger.debug(
"Could not resolve custom credential pool for child endpoint '%s': %s",
effective_base_url,
exc,
)
return None
if parent_pool is not None and effective_provider == parent_provider:
return parent_pool