diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index c27908da8f..6b4cc99150 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -568,6 +568,163 @@ class TestDelegateObservability(unittest.TestCase): self.assertEqual(result["results"][0]["exit_reason"], "max_iterations") +class TestSubagentCostRollup(unittest.TestCase): + """Port of Kilo-Org/kilocode#9448 — parent's session_estimated_cost_usd + must include subagent spend, not just the parent's own API calls.""" + + def _make_parent_with_cost_counters(self, depth=0, starting_cost=0.0): + parent = _make_mock_parent(depth=depth) + # The fields AIAgent exposes and the footer reads from. Set real + # floats/strings so the rollup can add to them rather than tripping + # on MagicMock auto-attrs. + parent.session_estimated_cost_usd = starting_cost + parent.session_cost_status = "unknown" + parent.session_cost_source = "none" + return parent + + def test_single_child_cost_folded_into_parent(self): + parent = self._make_parent_with_cost_counters(starting_cost=0.10) + + with patch("run_agent.AIAgent") as MockAgent: + mock_child = MagicMock() + mock_child.model = "claude-sonnet-4-6" + mock_child.session_prompt_tokens = 1000 + mock_child.session_completion_tokens = 200 + mock_child.session_estimated_cost_usd = 0.42 + mock_child.run_conversation.return_value = { + "final_response": "done", + "completed": True, + "interrupted": False, + "api_calls": 2, + "messages": [], + } + MockAgent.return_value = mock_child + + result = json.loads(delegate_task(goal="do stuff", parent_agent=parent)) + + # Parent footer must reflect parent_cost + child_cost. + self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.52, places=6) + # Rollup must strip the internal field before serialising to the model. + self.assertNotIn("_child_cost_usd", result["results"][0]) + self.assertNotIn("_child_role", result["results"][0]) + + def test_batch_children_costs_sum_into_parent(self): + parent = self._make_parent_with_cost_counters(starting_cost=0.00) + + with patch("tools.delegate_tool._run_single_child") as mock_run: + mock_run.side_effect = [ + { + "task_index": 0, + "status": "completed", + "summary": "A", + "api_calls": 2, + "duration_seconds": 1.0, + "_child_role": "leaf", + "_child_cost_usd": 0.15, + }, + { + "task_index": 1, + "status": "completed", + "summary": "B", + "api_calls": 2, + "duration_seconds": 1.0, + "_child_role": "leaf", + "_child_cost_usd": 0.27, + }, + { + "task_index": 2, + "status": "failed", + "summary": "", + "error": "boom", + "api_calls": 0, + "duration_seconds": 0.1, + "_child_role": "leaf", + "_child_cost_usd": 0.03, + }, + ] + result = json.loads( + delegate_task( + tasks=[{"goal": "A"}, {"goal": "B"}, {"goal": "C"}], + parent_agent=parent, + ) + ) + + # 0.15 + 0.27 + 0.03 even though one child failed — the API calls it + # made before failing still cost money. + self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.45, places=6) + # cost_source promoted from "none" since the parent had no direct spend. + self.assertEqual(parent.session_cost_source, "subagent") + self.assertEqual(parent.session_cost_status, "estimated") + # All internal fields stripped from results. + for entry in result["results"]: + self.assertNotIn("_child_cost_usd", entry) + self.assertNotIn("_child_role", entry) + + def test_zero_cost_children_leave_parent_source_untouched(self): + """If every child reports 0 cost (e.g. free local model), we should + not invent a fake 'subagent' source — the parent's 'none' stays.""" + parent = self._make_parent_with_cost_counters(starting_cost=0.00) + + with patch("tools.delegate_tool._run_single_child") as mock_run: + mock_run.return_value = { + "task_index": 0, + "status": "completed", + "summary": "done", + "api_calls": 1, + "duration_seconds": 0.5, + "_child_role": "leaf", + "_child_cost_usd": 0.0, + } + delegate_task(goal="free local run", parent_agent=parent) + + self.assertEqual(parent.session_estimated_cost_usd, 0.0) + self.assertEqual(parent.session_cost_source, "none") + + def test_parent_with_real_source_not_overwritten(self): + """If the parent already has its own cost billed (cost_source != 'none'), + adding subagent cost must not clobber the existing source label.""" + parent = self._make_parent_with_cost_counters(starting_cost=0.20) + parent.session_cost_status = "exact" + parent.session_cost_source = "openrouter" + + with patch("tools.delegate_tool._run_single_child") as mock_run: + mock_run.return_value = { + "task_index": 0, + "status": "completed", + "summary": "done", + "api_calls": 1, + "duration_seconds": 0.5, + "_child_role": "leaf", + "_child_cost_usd": 0.30, + } + delegate_task(goal="billed run", parent_agent=parent) + + self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.50, places=6) + # Real source label preserved. + self.assertEqual(parent.session_cost_source, "openrouter") + self.assertEqual(parent.session_cost_status, "exact") + + def test_rollup_tolerates_missing_cost_fields(self): + """Older fixtures / fabricated error entries may not carry + _child_cost_usd. Rollup must degrade to zero-add silently.""" + parent = self._make_parent_with_cost_counters(starting_cost=0.10) + + with patch("tools.delegate_tool._run_single_child") as mock_run: + mock_run.return_value = { + "task_index": 0, + "status": "completed", + "summary": "done", + "api_calls": 1, + "duration_seconds": 0.5, + # no _child_role, no _child_cost_usd + } + result = json.loads(delegate_task(goal="legacy", parent_agent=parent)) + + # Parent cost unchanged. + self.assertEqual(parent.session_estimated_cost_usd, 0.10) + self.assertEqual(len(result["results"]), 1) + + class TestBlockedTools(unittest.TestCase): def test_blocked_tools_constant(self): for tool in ["delegate_task", "clarify", "memory", "send_message", "execute_code"]: diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 397b7c958b..bceb9833c7 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -1616,6 +1616,19 @@ def _run_single_child( # parent thread can fire subagent_stop with the correct role. # Stripped before the dict is serialised back to the model. "_child_role": getattr(child, "_delegate_role", None), + # Captured before child.close() so the parent aggregator can fold + # the child's total spend into the parent's session cost. Port of + # Kilo-Org/kilocode#9448 — previously the footer only reflected the + # parent's direct API calls and under-counted subagent-heavy runs. + # Stripped before the dict is serialised back to the model. + "_child_cost_usd": ( + float(getattr(child, "session_estimated_cost_usd", 0.0) or 0.0) + if isinstance( + getattr(child, "session_estimated_cost_usd", 0.0), + (int, float), + ) + else 0.0 + ), } if status == "failed": entry["error"] = result.get("error", "Subagent did not produce a response.") @@ -2112,8 +2125,20 @@ def delegate_task( from hermes_cli.plugins import invoke_hook as _invoke_hook except Exception: _invoke_hook = None + # Aggregate child spend here so the parent's footer/UI reflect the true + # cost of a subagent-heavy turn. Port of Kilo-Org/kilocode#9448. Each + # child's cost was captured in _run_single_child before its AIAgent was + # closed; we fold them into the parent in one pass alongside the + # subagent_stop hook loop so we don't walk `results` twice. + _children_cost_total = 0.0 for entry in results: child_role = entry.pop("_child_role", None) + child_cost = entry.pop("_child_cost_usd", 0.0) + try: + if child_cost: + _children_cost_total += float(child_cost) + except (TypeError, ValueError): + pass if _invoke_hook is None: continue try: @@ -2128,6 +2153,28 @@ def delegate_task( except Exception: logger.debug("subagent_stop hook invocation failed", exc_info=True) + # Fold the aggregated child cost into the parent's session total. This is + # additive — each delegate_task call contributes its own children — so + # nested orchestrator→worker trees roll up naturally: each layer's own + # delegate_task() folds its direct children in, and when the orchestrator + # itself finishes, its parent folds the orchestrator's now-inflated total + # on top. Degrades silently if the parent lacks the counter (older test + # fixtures, etc.). + if _children_cost_total > 0.0: + try: + current = float(getattr(parent_agent, "session_estimated_cost_usd", 0.0) or 0.0) + parent_agent.session_estimated_cost_usd = current + _children_cost_total + # Upgrade the cost_source so the UI doesn't label a partially-real + # total as "none" when the parent itself hadn't billed any calls + # yet (rare but possible when the parent's only action this turn + # was delegate_task). + if getattr(parent_agent, "session_cost_source", "none") in (None, "", "none"): + parent_agent.session_cost_source = "subagent" + if getattr(parent_agent, "session_cost_status", "unknown") in (None, "", "unknown"): + parent_agent.session_cost_status = "estimated" + except Exception: + logger.debug("Subagent cost rollup failed", exc_info=True) + total_duration = round(time.monotonic() - overall_start, 2) return json.dumps( diff --git a/web/src/i18n/en.ts b/web/src/i18n/en.ts index 5a50e1a289..bf8b34356a 100644 --- a/web/src/i18n/en.ts +++ b/web/src/i18n/en.ts @@ -237,6 +237,9 @@ export const en: Translations = { exportConfig: "Export config as JSON", importConfig: "Import config from JSON", resetDefaults: "Reset to defaults", + resetScopeTooltip: "Reset {scope} to defaults", + confirmResetScope: "Reset all {scope} settings to their defaults? This only updates the form — changes aren't written to config.yaml until you press Save.", + resetScopeToast: "{scope} reset to defaults — review and Save to persist", rawYaml: "Raw YAML Configuration", searchResults: "Search Results", fields: "field{s}", diff --git a/web/src/i18n/types.ts b/web/src/i18n/types.ts index ab267933bb..718115e975 100644 --- a/web/src/i18n/types.ts +++ b/web/src/i18n/types.ts @@ -242,6 +242,9 @@ export interface Translations { exportConfig: string; importConfig: string; resetDefaults: string; + resetScopeTooltip: string; + confirmResetScope: string; + resetScopeToast: string; rawYaml: string; searchResults: string; fields: string; diff --git a/web/src/i18n/zh.ts b/web/src/i18n/zh.ts index dc67cd8215..ff8f3a2798 100644 --- a/web/src/i18n/zh.ts +++ b/web/src/i18n/zh.ts @@ -234,6 +234,9 @@ export const zh: Translations = { exportConfig: "导出配置为 JSON", importConfig: "从 JSON 导入配置", resetDefaults: "恢复默认值", + resetScopeTooltip: "将{scope}恢复为默认值", + confirmResetScope: "确定要将{scope}的所有设置恢复为默认值吗?此操作仅更新表单,在按下「保存」按钮前不会写入 config.yaml。", + resetScopeToast: "{scope}已恢复为默认值 — 请检查并保存以生效", rawYaml: "原始 YAML 配置", searchResults: "搜索结果", fields: "个字段", diff --git a/web/src/pages/ConfigPage.tsx b/web/src/pages/ConfigPage.tsx index dcd387a922..8705ac4c5d 100644 --- a/web/src/pages/ConfigPage.tsx +++ b/web/src/pages/ConfigPage.tsx @@ -228,7 +228,26 @@ export default function ConfigPage() { }; const handleReset = () => { - if (defaults) setConfig(structuredClone(defaults)); + if (!defaults || !config) return; + // Scope the reset to what the user is currently looking at: + // - search mode → the matched fields + // - form mode → the active category's fields + // Resetting the whole config here was a footgun (issue reported by @ykmfb001): + // the button sits next to the category tabs and users reasonably assumed + // "reset this tab", not "wipe my entire config.yaml". + const scopedFields = isSearching ? searchMatchedFields : activeFields; + if (scopedFields.length === 0) return; + const scopeLabel = isSearching + ? t.config.searchResults + : prettyCategoryName(activeCategory); + const message = t.config.confirmResetScope.replace("{scope}", scopeLabel); + if (!window.confirm(message)) return; + let next: Record = config; + for (const [key] of scopedFields) { + next = setNestedValue(next, key, getNestedValue(defaults, key)); + } + setConfig(next); + showToast(t.config.resetScopeToast.replace("{scope}", scopeLabel), "success"); }; const handleExport = () => { @@ -333,9 +352,17 @@ export default function ConfigPage() { - + {!yamlMode && (() => { + const resetScopeLabel = isSearching + ? t.config.searchResults + : prettyCategoryName(activeCategory); + const resetTitle = t.config.resetScopeTooltip.replace("{scope}", resetScopeLabel); + return ( + + ); + })()}