mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(vision): route auxiliary.vision.provider=openai to api.openai.com, skip text-only main (#31452)
* fix(vision): route auxiliary.vision.provider=openai to api.openai.com, skip text-only main for vision Fixes #31179. Three coupled fixes so a configured aux vision backend actually serves vision tasks instead of silently routing images to the user's main provider: 1. agent/auxiliary_client.py: `auxiliary.<task>.provider: openai` resolves to `custom` + `https://api.openai.com/v1`. "openai" was not in PROVIDER_REGISTRY (we have `openai-codex` for OAuth and `custom` for manual base_url), so the obvious config name silently failed to build a client. User-supplied base_url is still preserved; only the provider name normalises to `custom` so resolution doesn't hit the PROVIDER_REGISTRY-only path. 2. agent/auxiliary_client.py: the vision auto-detect chain now skips the user's main provider when models.dev reports `supports_vision=False`. Without this guard, a misconfigured aux provider would fall back to `auto`, which happily returned the main-provider client. The caller would then send image content to e.g. api.deepseek.com with model `gpt-4o-mini` and get a cryptic `unknown variant 'image_url', expected 'text'` from the provider's parser. 3. tools/vision_tools.py + tools/browser_tool.py: `check_vision_requirements` now mirrors the runtime fallback chain (explicit provider, then auto), so `vision_analyze` shows up whenever vision is actually serviceable. `browser_vision` gets a new `check_browser_vision_requirements` check_fn that AND-gates browser + vision availability, so it doesn't get advertised to the model when the call would fail at runtime. Reproduction (config from the bug report): model.provider: deepseek model.default: deepseek-v4-pro auxiliary.vision.provider: openai auxiliary.vision.model: gpt-4o-mini Before: resolve_vision_provider_client() returns None for the explicit provider, fallback auto returns the deepseek client with model='gpt-4o-mini', image hits api.deepseek.com → 'unknown variant image_url'. vision_analyze hidden from tool list; browser_vision exposed but fails at call time. After: resolves to custom + api.openai.com/v1 with model gpt-4o-mini. vision_analyze and browser_vision both gate correctly on capability. Tests: tests/agent/test_vision_routing_31179.py covers all three fixes (12 cases including the user's exact scenario, base_url preservation, text-only-main skip, capability-unknown permissive fallback, and tool gating parity). Existing 382 tests across auxiliary/vision/image_routing suites still pass. * test(vision): use exact hostname check to silence CodeQL substring-sanitization alert * fix(auxiliary): drop model name from vision-skip debug log to silence CodeQL The new `logger.debug(...)` added in the previous commit interpolated both `main_provider` and `vision_model` (a public model slug \u2014 not sensitive). CodeQL's `py/clear-text-logging-sensitive-data` heuristic re-flagged it twice because the rule mis-detects multi-value interpolations near tainted-via-config provider strings. Drop the model from the log args (provider alone is enough to diagnose the skip; the same sibling branch a few lines up already logs provider only). Behavior unchanged; CodeQL false positive cleared.
This commit is contained in:
parent
d9ec90585c
commit
3d66787a04
4 changed files with 417 additions and 3 deletions
|
|
@ -3730,6 +3730,37 @@ _VISION_AUTO_PROVIDER_ORDER = (
|
|||
)
|
||||
|
||||
|
||||
def _main_model_supports_vision(provider: str, model: Optional[str]) -> bool:
|
||||
"""Return True when ``provider``/``model`` is known to accept image input.
|
||||
|
||||
Used by the vision auto-detect chain to skip the user's main provider
|
||||
when it's known to be text-only (e.g. DeepSeek, gpt-oss without vision).
|
||||
Without this guard, ``resolve_vision_provider_client(provider="auto")``
|
||||
would happily return the main-provider client and any subsequent image
|
||||
payload would surface as a cryptic provider-side error
|
||||
(``unknown variant `image_url`, expected `text```, #31179).
|
||||
|
||||
Returns True when capability lookup is unknown — preserves the historical
|
||||
behaviour of attempting the call, so providers we haven't catalogued yet
|
||||
don't silently regress to text-only.
|
||||
"""
|
||||
try:
|
||||
from agent.image_routing import _lookup_supports_vision
|
||||
from hermes_cli.config import load_config
|
||||
except ImportError:
|
||||
return True
|
||||
try:
|
||||
supports = _lookup_supports_vision(provider, model, load_config())
|
||||
except Exception: # pragma: no cover - defensive
|
||||
return True
|
||||
if supports is None:
|
||||
# No capability data — keep current behaviour and let the call attempt
|
||||
# happen rather than silently skipping. This avoids false-positive
|
||||
# skips for new/custom providers.
|
||||
return True
|
||||
return bool(supports)
|
||||
|
||||
|
||||
def _normalize_vision_provider(provider: Optional[str]) -> str:
|
||||
return _normalize_aux_provider(provider)
|
||||
|
||||
|
|
@ -3870,6 +3901,23 @@ def resolve_vision_provider_client(
|
|||
"vision support) — falling through to aggregator chain",
|
||||
main_provider,
|
||||
)
|
||||
elif not _main_model_supports_vision(main_provider, vision_model):
|
||||
# The main model is known to be text-only (e.g. DeepSeek V4,
|
||||
# gpt-oss-120b without vision). Building a client and sending
|
||||
# an image would produce a cryptic provider-side error like
|
||||
# ``unknown variant `image_url`, expected `text``` (#31179).
|
||||
# Fall through to the aggregator chain instead.
|
||||
#
|
||||
# Only log the provider name (not the model) — mirrors the
|
||||
# sibling _PROVIDERS_WITHOUT_VISION branch above, and avoids
|
||||
# CodeQL py/clear-text-logging-sensitive-data heuristic false
|
||||
# positives on multi-value interpolations.
|
||||
logger.debug(
|
||||
"Vision auto-detect: skipping main provider %s "
|
||||
"(reports no vision capability) — falling through to "
|
||||
"aggregator chain",
|
||||
main_provider,
|
||||
)
|
||||
else:
|
||||
rpc_client, rpc_model = resolve_provider_client(
|
||||
main_provider, vision_model,
|
||||
|
|
@ -4281,6 +4329,23 @@ def _get_cached_client(
|
|||
return client, model or default_model
|
||||
|
||||
|
||||
# Aliases that target direct REST APIs not modeled as first-class providers
|
||||
# in PROVIDER_REGISTRY. Used for ``auxiliary.<task>.provider`` so users can
|
||||
# write the obvious name and have it resolve to a working ``custom`` endpoint
|
||||
# without needing to know our internal provider IDs.
|
||||
#
|
||||
# Why these specifically: PROVIDER_REGISTRY has ``openai-codex`` (OAuth) and
|
||||
# ``custom`` (manual base_url + OPENAI_API_KEY) but no plain ``openai`` for
|
||||
# direct API-key access. Users predictably type ``provider: openai`` and
|
||||
# expect it to use OPENAI_API_KEY against api.openai.com. Previously this
|
||||
# silently fell back to the user's main provider, sending OpenAI model names
|
||||
# to e.g. DeepSeek and producing cryptic ``unknown variant 'image_url'``
|
||||
# errors (issue #31179).
|
||||
_AUX_DIRECT_API_BASE_URLS: Dict[str, str] = {
|
||||
"openai": "https://api.openai.com/v1",
|
||||
}
|
||||
|
||||
|
||||
def _resolve_task_provider_model(
|
||||
task: str = None,
|
||||
provider: str = None,
|
||||
|
|
@ -4317,6 +4382,25 @@ def _resolve_task_provider_model(
|
|||
resolved_model = model or cfg_model
|
||||
resolved_api_mode = cfg_api_mode
|
||||
|
||||
# Convenience aliases for direct API-key endpoints that aren't first-class
|
||||
# providers (e.g. ``provider: openai`` → custom + api.openai.com/v1).
|
||||
# Applied to both explicit args and config-derived values. When the user
|
||||
# has already supplied a base_url we keep their endpoint but still rewrite
|
||||
# the provider to ``custom`` so resolution doesn't hit the
|
||||
# PROVIDER_REGISTRY-only path (which has no ``openai`` entry).
|
||||
def _expand_direct_api_alias(prov: Optional[str], existing_base: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
|
||||
if not prov:
|
||||
return prov, existing_base
|
||||
target_base = _AUX_DIRECT_API_BASE_URLS.get(prov.strip().lower())
|
||||
if target_base is None:
|
||||
return prov, existing_base
|
||||
return "custom", existing_base or target_base
|
||||
|
||||
if provider:
|
||||
provider, base_url = _expand_direct_api_alias(provider, base_url)
|
||||
if cfg_provider:
|
||||
cfg_provider, cfg_base_url = _expand_direct_api_alias(cfg_provider, cfg_base_url)
|
||||
|
||||
if base_url:
|
||||
return "custom", resolved_model, base_url, api_key, resolved_api_mode
|
||||
if provider:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue