diff --git a/agent/agent_init.py b/agent/agent_init.py index 4f2e3f138f7..4210b515f65 100644 --- a/agent/agent_init.py +++ b/agent/agent_init.py @@ -1227,6 +1227,12 @@ def init_agent( # targets. agent._task_completion_guidance = bool(_agent_section.get("task_completion_guidance", True)) + # Universal parallel-tool-call guidance toggle. Default True. Separate + # flag from task_completion_guidance because a user may want one but not + # the other. Steers the model to batch independent tool calls into a + # single turn; the runtime already executes such batches concurrently. + agent._parallel_tool_call_guidance = bool(_agent_section.get("parallel_tool_call_guidance", True)) + # Local Python toolchain probe toggle. Default True. When False, # the probe is skipped entirely (no subprocess calls, no system-prompt # line). Useful for users on exotic setups where the probe heuristics diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index bbae3c9a773..b8e60722168 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -305,6 +305,45 @@ TASK_COMPLETION_GUIDANCE = ( "is always better than inventing a result." ) +# Universal parallel-tool-call guidance — applied to ALL models. +# +# Why this matters for cost: every assistant turn resends the entire +# accumulated conversation (and, on cache-friendly providers, re-reads the +# cached prefix and pays for the newly-appended turn). A model that issues +# one tool call per turn multiplies the number of round-trips — and therefore +# the resent context — for any task that needs several independent reads, +# searches, or safe lookups. Batching independent calls into a single +# assistant response collapses N turns into one, cutting both latency and the +# resent-context cost that compounds over a long conversation. +# +# The hermes-agent runtime already executes a batch of tool calls +# concurrently when they are independent (read-only tools always; path-scoped +# file ops when their targets don't overlap — see +# run_agent._execute_tool_calls / tool_dispatch_helpers). The missing piece +# was telling the *model* to emit those calls together in the first place; +# nothing in the open-source system prompt encouraged batching. This block +# closes that gap. +# +# Short on purpose — shipped in the cached system prompt to every user, every +# session. Token cost is paid once at install and amortised across all +# sessions via prefix caching. Keep it tight. +# +# Ported from cline/cline#11514 ("encourage parallel tool calls"), adapted +# from Cline's TypeScript tool-surface guidance to hermes-agent's Python +# prompt-assembly architecture. +PARALLEL_TOOL_CALL_GUIDANCE = ( + "# Parallel tool calls\n" + "When you need several pieces of information that don't depend on each " + "other, request them together in a single response instead of one tool " + "call per turn. Independent reads, searches, web fetches, and read-only " + "commands should be batched into the same assistant turn — the runtime " + "executes independent calls concurrently, and batching avoids resending " + "the whole conversation on every extra round-trip.\n" + "Only serialize calls when a later call genuinely depends on an earlier " + "call's result (e.g. you must read a file before you can patch it). When " + "in doubt and the calls are independent, batch them." +) + # OpenAI GPT/Codex-specific execution guidance. Addresses known failure modes # where GPT models abandon work on partial results, skip prerequisite lookups, # hallucinate instead of using tools, and declare "done" without verification. diff --git a/agent/system_prompt.py b/agent/system_prompt.py index b3f39123fd5..281f01399b4 100644 --- a/agent/system_prompt.py +++ b/agent/system_prompt.py @@ -33,6 +33,7 @@ from agent.prompt_builder import ( KANBAN_GUIDANCE, MEMORY_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE, + PARALLEL_TOOL_CALL_GUIDANCE, PLATFORM_HINTS, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE, @@ -123,6 +124,17 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None) if getattr(agent, "_task_completion_guidance", True) and agent.valid_tool_names: stable_parts.append(TASK_COMPLETION_GUIDANCE) + # Universal parallel-tool-call guidance. Tells the model to batch + # independent tool calls into one assistant turn rather than emitting one + # call per turn — the runtime already runs independent calls concurrently + # (read-only tools always; non-overlapping path-scoped file ops), so the + # only thing missing was steering the model to produce the batch. Cuts + # round-trips and the resent-context cost that compounds over a long + # conversation. Gated by config.yaml ``agent.parallel_tool_call_guidance`` + # (default True) and only injected when tools are actually loaded. + if getattr(agent, "_parallel_tool_call_guidance", True) and agent.valid_tool_names: + stable_parts.append(PARALLEL_TOOL_CALL_GUIDANCE) + # Tool-aware behavioral guidance: only inject when the tools are loaded tool_guidance = [] if "memory" in agent.valid_tool_names: diff --git a/hermes_cli/config.py b/hermes_cli/config.py index a9e8248e946..6db1130b5cd 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -925,6 +925,15 @@ DEFAULT_CONFIG = { # plausible-looking output when a real path is blocked. Costs ~80 # tokens in the cached system prompt. Set False to disable globally. "task_completion_guidance": True, + # Universal parallel-tool-call guidance — short prompt block applied to + # all models that tells the model to batch independent tool calls + # (reads, searches, web fetches, read-only commands) into one turn + # instead of one call per turn. The runtime already runs independent + # calls concurrently, so this just steers the model to produce the + # batch — cutting round-trips and the resent-context cost that + # compounds over a long conversation. Costs ~70 tokens in the cached + # system prompt. Set False to disable globally. + "parallel_tool_call_guidance": True, # Local-environment toolchain probe — surfaces Python/pip/uv/PEP-668 # state in the system prompt when something non-default is detected # (e.g. python3 has no pip module, pip→python version mismatch, PEP diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 909cbe07a08..78b92dcbad9 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -713,6 +713,69 @@ def find_custom_provider_identity(base_url: str) -> Optional[str]: return None +def canonical_custom_identity( + *, + base_url: Optional[str] = None, + config_provider: Optional[str] = None, +) -> Optional[str]: + """Recover a routable ``custom:`` identity for a bare custom provider. + + The bare string ``"custom"`` is the *resolved billing class* shared by + every named ``providers:`` / ``custom_providers:`` entry — it is NOT a + routable provider identity (``resolve_runtime_provider("custom")`` falls + through to the OpenRouter default URL with no api_key, which surfaces to + the user as "No LLM provider configured"). + + Any code path that persists or restores a session's provider override + must run the resolved provider through this helper so a bare ``"custom"`` + is upgraded back to its durable ``custom:`` menu key. Two recovery + sources, in priority order: + + 1. ``base_url`` — reverse-lookup the entry that owns the endpoint URL + (the one fact that always survives the persistence round-trip when a + URL was recorded). + 2. ``config_provider`` — the active ``config.model.provider`` (or its + ``provider``/``HERMES_INFERENCE_PROVIDER`` equivalent). When the agent + was built without a base_url on the override (the recurring + Desktop/TUI regression vector), the configured provider is the only + durable identity left, so fall back to it when it names a real entry. + + Returns ``custom:`` when a routable identity is recovered, else + ``None`` (caller keeps whatever it had — bare ``"custom"`` only as a last + resort, e.g. a genuine ad-hoc endpoint with no config entry). + """ + # 1. Reverse-lookup by endpoint URL. + if base_url: + identity = find_custom_provider_identity(base_url) + if identity: + return identity + + # 2. Fall back to the configured provider when it names a real entry. + candidate = str(config_provider or "").strip() + if not candidate: + try: + candidate = str(_get_model_config().get("provider") or "").strip() + except Exception: + candidate = "" + if not candidate: + candidate = os.environ.get("HERMES_INFERENCE_PROVIDER", "").strip() + + candidate_norm = _normalize_custom_provider_name(candidate) + # A bare/non-routable candidate cannot heal a bare custom override. + if not candidate_norm or candidate_norm in {"custom", "auto", "openrouter"}: + return None + # Only return it when it actually resolves to a configured custom entry, + # so we never invent a `custom:` that resolution can't honor. + try: + if _get_named_custom_provider(candidate) is not None: + if candidate_norm.startswith("custom:"): + return candidate_norm + return f"custom:{candidate_norm}" + except Exception: + pass + return None + + def _normalize_base_url_for_match(value) -> str: return str(value or "").strip().rstrip("/").lower() diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py index 4eb2f86e5a2..e98c26e319f 100644 --- a/tests/agent/test_prompt_builder.py +++ b/tests/agent/test_prompt_builder.py @@ -27,6 +27,7 @@ from agent.prompt_builder import ( TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, OPENAI_MODEL_EXECUTION_GUIDANCE, + PARALLEL_TOOL_CALL_GUIDANCE, MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, PLATFORM_HINTS, @@ -1497,6 +1498,43 @@ class TestOpenAIModelExecutionGuidance: assert len(OPENAI_MODEL_EXECUTION_GUIDANCE) > 100 +class TestParallelToolCallGuidance: + """Behavior contracts for the universal parallel-tool-call guidance block. + + Asserts the invariants the block must satisfy (steer batching, scope to + independent calls, stay short for the cached prompt) rather than freezing + its exact wording. + """ + + def test_is_nonempty_string(self): + assert isinstance(PARALLEL_TOOL_CALL_GUIDANCE, str) + assert PARALLEL_TOOL_CALL_GUIDANCE.strip() + + def test_steers_batching_into_one_response(self): + text = PARALLEL_TOOL_CALL_GUIDANCE.lower() + # Must tell the model to group independent calls together. + assert "single response" in text or "same" in text and "turn" in text + assert "independent" in text + + def test_carves_out_dependent_calls(self): + # Must NOT tell the model to batch dependent calls — that would break + # ordering (read-before-patch). The block has to acknowledge the + # serialize-when-dependent case. + text = PARALLEL_TOOL_CALL_GUIDANCE.lower() + assert "depend" in text + + def test_stays_short_for_cached_prompt(self): + # Shipped in every cached system prompt — keep it tight. The existing + # task-completion block is ~600 chars; allow generous headroom but + # guard against accidental essay growth. + assert len(PARALLEL_TOOL_CALL_GUIDANCE) < 900 + + def test_has_a_heading(self): + # Heading delimits it as its own section in the assembled prompt. + assert PARALLEL_TOOL_CALL_GUIDANCE.lstrip().startswith("#") + + + # ========================================================================= # Budget warning history stripping # ========================================================================= diff --git a/tests/tui_gateway/test_custom_provider_session_persistence.py b/tests/tui_gateway/test_custom_provider_session_persistence.py index eaa6d0b2111..f78a5d8c2fa 100644 --- a/tests/tui_gateway/test_custom_provider_session_persistence.py +++ b/tests/tui_gateway/test_custom_provider_session_persistence.py @@ -111,11 +111,11 @@ class TestRuntimeModelConfigPersistsEntryIdentity: assert _runtime_model_config(agent)["provider"] == "anthropic" -def _make_agent_with_override(override, monkeypatch, config): +def _make_agent_with_override(override, monkeypatch, config, model_cfg=None): """Run _make_agent through the REAL resolve_runtime_provider against a patched config, returning the kwargs AIAgent was constructed with.""" monkeypatch.setattr(rp, "load_config", lambda: config) - monkeypatch.setattr(rp, "_get_model_config", lambda: {}) + monkeypatch.setattr(rp, "_get_model_config", lambda: model_cfg or {}) # Keep credential-pool resolution off the developer's real HERMES home. monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None) @@ -196,3 +196,159 @@ class TestResumeRoundTrip: assert kwargs["provider"] == "custom" assert kwargs["base_url"] == "http://127.0.0.1:8000/v1" assert kwargs["api_key"] == "no-key-required" + + +# --- Regression: bare "custom" WITHOUT a base_url (GH #44022 / #47714) ------ +# +# The recurring Desktop/TUI "No LLM provider configured" regression. Every +# point-fix above recovers the entry identity from the persisted base_url — +# but a session can be persisted/restored with bare ``provider="custom"`` and +# NO base_url (the agent was built without one on the override). Then bare +# "custom" leaked through verbatim, ``resolve_runtime_provider("custom")`` +# routed to the OpenRouter default URL with no api_key, and the next turn / +# resume failed with "No LLM provider configured". These tests lock the +# config-fallback recovery at all three leak sites so it cannot regress again. + +NAMED_CONFIG = { + "model": {"default": "mimo-v2.5-pro", "provider": "custom:mimo-v2.5-pro"}, + "custom_providers": [ + { + "name": "mimo-v2.5-pro", + "base_url": MIMO_URL, + "api_key": MIMO_KEY, + "api_mode": "chat_completions", + } + ], +} + + +class TestBareCustomNoBaseUrlHealsFromConfig: + """A named custom provider must never escape as bare ``"custom"`` when the + config identifies the active entry — even when no base_url survived.""" + + def test_canonical_identity_recovers_from_config_when_no_base_url( + self, monkeypatch + ): + monkeypatch.setattr(rp, "load_config", lambda: NAMED_CONFIG) + monkeypatch.setattr(rp, "_get_model_config", lambda: NAMED_CONFIG["model"]) + + # No base_url to reverse-lookup → must fall back to config.model.provider. + assert ( + rp.canonical_custom_identity(base_url=None) + == "custom:mimo-v2.5-pro" + ) + + def test_canonical_identity_returns_none_without_a_real_entry( + self, monkeypatch + ): + # config.model.provider is bare "custom" and no entry is named → no + # routable identity to recover; caller keeps its fallback behaviour. + monkeypatch.setattr(rp, "load_config", lambda: {}) + monkeypatch.setattr(rp, "_get_model_config", lambda: {"provider": "custom"}) + monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False) + + assert rp.canonical_custom_identity(base_url=None) is None + + def test_persist_recovers_entry_when_agent_has_no_base_url(self, monkeypatch): + monkeypatch.setattr(rp, "load_config", lambda: NAMED_CONFIG) + monkeypatch.setattr(rp, "_get_model_config", lambda: NAMED_CONFIG["model"]) + + from tui_gateway.server import _runtime_model_config + + agent = _custom_agent(base_url="") # the regression vector + config = _runtime_model_config(agent) + + # Bare "custom" must NOT be persisted — it heals to the entry identity. + assert config["provider"] == "custom:mimo-v2.5-pro" + + def test_restore_heals_bare_custom_row_without_base_url(self, monkeypatch): + monkeypatch.setattr(rp, "load_config", lambda: NAMED_CONFIG) + monkeypatch.setattr(rp, "_get_model_config", lambda: NAMED_CONFIG["model"]) + + from tui_gateway.server import _stored_session_runtime_overrides + + # A poisoned row from before the fix: bare custom, no base_url. + row = { + "model": "mimo-v2.5-pro", + "model_config": json.dumps( + {"model": "mimo-v2.5-pro", "provider": "custom"} + ), + "billing_provider": "custom", + } + overrides = _stored_session_runtime_overrides(row) + + assert overrides["provider_override"] == "custom:mimo-v2.5-pro" + assert overrides["model_override"]["provider"] == "custom:mimo-v2.5-pro" + + def test_restore_drops_bare_custom_when_config_cannot_heal(self, monkeypatch): + """No recoverable identity: do NOT restore bare "custom" as a routable + override — leave it unset so resume falls back to the configured + default instead of the broken OpenRouter route.""" + monkeypatch.setattr(rp, "load_config", lambda: {}) + monkeypatch.setattr(rp, "_get_model_config", lambda: {}) + monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False) + + from tui_gateway.server import _stored_session_runtime_overrides + + row = { + "model": "some-model", + "model_config": json.dumps( + {"model": "some-model", "provider": "custom"} + ), + "billing_provider": "custom", + } + overrides = _stored_session_runtime_overrides(row) + + assert "provider_override" not in overrides + assert overrides["model_override"]["provider"] is None + + def test_make_agent_heals_bare_custom_no_base_url_end_to_end(self, monkeypatch): + """The exact failing path: stored override has bare custom + no + base_url; _make_agent must build the AIAgent with the named entry's + endpoint + key, NOT the OpenRouter default with an empty key.""" + override = { + "model": "mimo-v2.5-pro", + "provider": "custom", + "base_url": None, + "api_mode": "chat_completions", + } + + kwargs = _make_agent_with_override( + override, monkeypatch, NAMED_CONFIG, model_cfg=NAMED_CONFIG["model"] + ) + + assert kwargs["base_url"] == MIMO_URL + assert kwargs["api_key"] == MIMO_KEY + assert "openrouter.ai" not in (kwargs.get("base_url") or "") + + def test_first_db_row_persists_entry_identity_not_bare_custom(self, monkeypatch): + """The ORIGIN of poisoned rows: a fresh desktop session's first DB + write (_ensure_session_db_row, before the agent is built) copies the + composer override's RESOLVED provider. A named custom provider's + resolved value is bare "custom" — persisting that verbatim seeds the + unresumable row. It must be healed to ``custom:`` here.""" + monkeypatch.setattr(rp, "load_config", lambda: NAMED_CONFIG) + monkeypatch.setattr(rp, "_get_model_config", lambda: NAMED_CONFIG["model"]) + + captured = {} + + class _DB: + def create_session(self, key, **kwargs): + captured.update(kwargs) + + from tui_gateway import server as srv + + monkeypatch.setattr(srv, "_get_db", lambda: _DB()) + monkeypatch.setattr(srv, "_resolve_model", lambda: "mimo-v2.5-pro") + + session = { + "session_key": "agent:main:desktop:dm:abc", + # composer override carrying the lossy resolved provider + no base_url + "model_override": {"model": "mimo-v2.5-pro", "provider": "custom"}, + } + srv._ensure_session_db_row(session) + + persisted = captured.get("model_config") or {} + assert persisted.get("provider") == "custom:mimo-v2.5-pro" + + diff --git a/tui_gateway/server.py b/tui_gateway/server.py index cc9399a7c2e..12ee450c6f5 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1218,6 +1218,27 @@ def _ensure_session_db_row(session: dict) -> None: ): if val := override.get(src_key): model_config[cfg_key] = str(val) + # The composer override may carry the RESOLVED provider "custom" for a named + # ``providers:`` / ``custom_providers:`` entry. Persisting bare "custom" here + # (the very first DB write for a fresh desktop session, before the agent is + # built) is the origin of the recurring "No LLM provider configured" rows: + # on the next resume bare "custom" routes to OpenRouter with no key. Recover + # the durable ``custom:`` identity from the override's base_url, else + # the configured provider, so a routable identity is persisted from the + # start (matches _runtime_model_config's normalization). + if str(model_config.get("provider") or "").strip().lower() == "custom": + try: + from hermes_cli.runtime_provider import canonical_custom_identity + + healed = canonical_custom_identity( + base_url=model_config.get("base_url") or None + ) + if healed: + model_config["provider"] = healed + except Exception: + logger.debug( + "custom provider identity recovery failed (db row)", exc_info=True + ) if (reasoning := session.get("create_reasoning_override")) is not None: model_config["reasoning_config"] = reasoning if tier := session.get("create_service_tier_override"): @@ -1579,6 +1600,28 @@ def _stored_session_runtime_overrides(row: dict | None) -> dict: reasoning_config = model_config.get("reasoning_config") service_tier = str(model_config.get("service_tier") or "").strip() + # Heal a bare ``"custom"`` provider stored by an older build (or any leak + # site that bypassed _runtime_model_config's normalization). Bare custom is + # the resolved billing class, not a routable identity — restoring it as the + # session's provider override routes the resume to the OpenRouter default + # URL with no api_key, surfacing as "No LLM provider configured". Recover + # the durable ``custom:`` menu key from the stored base_url, falling + # back to the configured provider when the row has no base_url (the + # recurring Desktop/TUI regression vector). If neither names a real entry, + # drop the bare provider entirely so resume falls back to the configured + # default rather than the broken OpenRouter route. + if provider.strip().lower() == "custom": + healed = None + try: + from hermes_cli.runtime_provider import canonical_custom_identity + + healed = canonical_custom_identity(base_url=base_url or None) + except Exception: + logger.debug( + "custom provider identity recovery failed", exc_info=True + ) + provider = healed or ("" if not base_url else provider) + if model: # Use the same dict-shaped override that live /model switches use so a # DB-restored session can preserve custom endpoint metadata across both @@ -1613,21 +1656,27 @@ def _runtime_model_config(agent, existing: dict | None = None) -> dict: if model: config["model"] = model if provider: - if provider == "custom" and base_url: + if provider.strip().lower() == "custom": # ``agent.provider`` is the RESOLVED provider, and for any named # ``providers:`` / ``custom_providers:`` entry that is the literal # string "custom" — persisting it loses the entry identity, so a # later resume/rebuild cannot re-resolve the entry's credentials # (the api_key is deliberately never persisted; see # _stored_session_runtime_overrides). Recover the canonical - # ``custom:`` menu key from the endpoint URL so - # resolve_runtime_provider() can find the entry again. + # ``custom:`` menu key from the endpoint URL when present, + # else from the configured provider — this second fallback is the + # fix for sessions built WITHOUT a base_url on the override (the + # recurring Desktop/TUI "No LLM provider configured" regression: + # bare "custom" with no base_url was persisted verbatim and routed + # to OpenRouter with no key on the next resume). try: from hermes_cli.runtime_provider import ( - find_custom_provider_identity, + canonical_custom_identity, ) - provider = find_custom_provider_identity(base_url) or provider + provider = ( + canonical_custom_identity(base_url=base_url) or provider + ) except Exception: logger.debug( "custom provider identity lookup failed", exc_info=True @@ -3550,25 +3599,27 @@ def _make_agent( override_api_key = model_override.get("api_key") override_api_mode = model_override.get("api_mode") resolve_kwargs = {} - if ( - override_base_url - and str(requested_provider or "").strip().lower() == "custom" - ): + if str(requested_provider or "").strip().lower() == "custom": # Session rows persisted before the custom-provider identity fix # (see _runtime_model_config) stored the resolved provider # "custom", which _get_named_custom_provider cannot match back to # a named ``providers:`` / ``custom_providers:`` entry — the - # rebuild then either raised auth_unavailable or silently - # resolved placeholder credentials against the patched-back - # base_url. Recover the entry identity from the persisted - # base_url; failing that, hand the base_url to the direct-alias - # branch so pool/env credentials can still be resolved for it. - from hermes_cli.runtime_provider import find_custom_provider_identity + # rebuild then either raised auth_unavailable, silently resolved + # placeholder credentials against the patched-back base_url, or + # (when no base_url was stored) routed to the OpenRouter default + # with no key, surfacing as "No LLM provider configured". Recover + # the entry identity from the persisted base_url, falling back to + # the configured provider when the override carries no base_url + # (the recurring Desktop/TUI regression vector). + from hermes_cli.runtime_provider import canonical_custom_identity - recovered = find_custom_provider_identity(override_base_url) + recovered = canonical_custom_identity(base_url=override_base_url or None) if recovered: requested_provider = recovered - resolve_kwargs["explicit_base_url"] = override_base_url + if override_base_url: + # Failing identity recovery, still hand the base_url to the + # direct-alias branch so pool/env credentials resolve for it. + resolve_kwargs["explicit_base_url"] = override_base_url runtime = resolve_runtime_provider( requested=requested_provider, target_model=model or None,