diff --git a/run_agent.py b/run_agent.py index c1f2623c83..1fc3821194 100644 --- a/run_agent.py +++ b/run_agent.py @@ -293,6 +293,13 @@ class AIAgent: self._use_prompt_caching = is_openrouter and is_claude self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost) + # Iteration budget pressure: warn the LLM as it approaches max_iterations. + # Warnings are injected into the last tool result JSON (not as separate + # messages) so they don't break message structure or invalidate caching. + self._budget_caution_threshold = 0.7 # 70% — nudge to start wrapping up + self._budget_warning_threshold = 0.9 # 90% — urgent, respond now + self._budget_pressure_enabled = True + # Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log # so tool failures, API errors, etc. are inspectable after the fact. from agent.redact import RedactingFormatter @@ -2655,7 +2662,7 @@ class AIAgent: return compressed, new_system_prompt - def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str) -> None: + def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None: """Execute tool calls from the assistant message and append results to messages.""" for i, tool_call in enumerate(assistant_message.tool_calls, 1): # SAFETY: check interrupt BEFORE starting each tool. @@ -2885,6 +2892,51 @@ class AIAgent: if self.tool_delay > 0 and i < len(assistant_message.tool_calls): time.sleep(self.tool_delay) + # ── Budget pressure injection ───────────────────────────────── + # After all tool calls in this turn are processed, check if we're + # approaching max_iterations. If so, inject a warning into the LAST + # tool result's JSON so the LLM sees it naturally when reading results. + budget_warning = self._get_budget_warning(api_call_count) + if budget_warning and messages and messages[-1].get("role") == "tool": + last_content = messages[-1]["content"] + try: + parsed = json.loads(last_content) + if isinstance(parsed, dict): + parsed["_budget_warning"] = budget_warning + messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False) + else: + messages[-1]["content"] = last_content + f"\n\n{budget_warning}" + except (json.JSONDecodeError, TypeError): + messages[-1]["content"] = last_content + f"\n\n{budget_warning}" + if not self.quiet_mode: + remaining = self.max_iterations - api_call_count + tier = "⚠️ WARNING" if remaining <= self.max_iterations * 0.1 else "💡 CAUTION" + print(f"{self.log_prefix}{tier}: {remaining} iterations remaining") + + def _get_budget_warning(self, api_call_count: int) -> Optional[str]: + """Return a budget pressure string, or None if not yet needed. + + Two-tier system: + - Caution (70%): nudge to consolidate work + - Warning (90%): urgent, must respond now + """ + if not self._budget_pressure_enabled or self.max_iterations <= 0: + return None + progress = api_call_count / self.max_iterations + remaining = self.max_iterations - api_call_count + if progress >= self._budget_warning_threshold: + return ( + f"[BUDGET WARNING: Iteration {api_call_count}/{self.max_iterations}. " + f"Only {remaining} iteration(s) left. " + "Provide your final response NOW. No more tool calls unless absolutely critical.]" + ) + if progress >= self._budget_caution_threshold: + return ( + f"[BUDGET: Iteration {api_call_count}/{self.max_iterations}. " + f"{remaining} iterations left. Start consolidating your work.]" + ) + return None + def _handle_max_iterations(self, messages: list, api_call_count: int) -> str: """Request a summary when max iterations are reached. Returns the final response text.""" print(f"⚠️ Reached maximum iterations ({self.max_iterations}). Requesting summary...") @@ -4064,7 +4116,7 @@ class AIAgent: messages.append(assistant_msg) self._log_msg_to_db(assistant_msg) - self._execute_tool_calls(assistant_message, messages, effective_task_id) + self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count) # Refund the iteration if the ONLY tool(s) called were # execute_code (programmatic tool calling). These are diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 64de980d59..45b0b42473 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -1173,3 +1173,78 @@ class TestSystemPromptStability: conversation_history = [] should_prefetch = not conversation_history assert should_prefetch is True + + +# --------------------------------------------------------------------------- +# Iteration budget pressure warnings +# --------------------------------------------------------------------------- + +class TestBudgetPressure: + """Budget pressure warning system (issue #414).""" + + def test_no_warning_below_caution(self, agent): + agent.max_iterations = 60 + assert agent._get_budget_warning(30) is None + + def test_caution_at_70_percent(self, agent): + agent.max_iterations = 60 + msg = agent._get_budget_warning(42) + assert msg is not None + assert "[BUDGET:" in msg + assert "18 iterations left" in msg + + def test_warning_at_90_percent(self, agent): + agent.max_iterations = 60 + msg = agent._get_budget_warning(54) + assert "[BUDGET WARNING:" in msg + assert "Provide your final response NOW" in msg + + def test_last_iteration(self, agent): + agent.max_iterations = 60 + msg = agent._get_budget_warning(59) + assert "1 iteration(s) left" in msg + + def test_disabled(self, agent): + agent.max_iterations = 60 + agent._budget_pressure_enabled = False + assert agent._get_budget_warning(55) is None + + def test_zero_max_iterations(self, agent): + agent.max_iterations = 0 + assert agent._get_budget_warning(0) is None + + def test_injects_into_json_tool_result(self, agent): + """Warning should be injected as _budget_warning field in JSON tool results.""" + import json + agent.max_iterations = 10 + messages = [ + {"role": "tool", "content": json.dumps({"output": "done", "exit_code": 0}), "tool_call_id": "tc1"} + ] + warning = agent._get_budget_warning(9) + assert warning is not None + # Simulate the injection logic + last_content = messages[-1]["content"] + parsed = json.loads(last_content) + parsed["_budget_warning"] = warning + messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False) + result = json.loads(messages[-1]["content"]) + assert "_budget_warning" in result + assert "BUDGET WARNING" in result["_budget_warning"] + assert result["output"] == "done" # original content preserved + + def test_appends_to_non_json_tool_result(self, agent): + """Warning should be appended as text for non-JSON tool results.""" + agent.max_iterations = 10 + messages = [ + {"role": "tool", "content": "plain text result", "tool_call_id": "tc1"} + ] + warning = agent._get_budget_warning(9) + # Simulate injection logic for non-JSON + last_content = messages[-1]["content"] + try: + import json + json.loads(last_content) + except (json.JSONDecodeError, TypeError): + messages[-1]["content"] = last_content + f"\n\n{warning}" + assert "plain text result" in messages[-1]["content"] + assert "BUDGET WARNING" in messages[-1]["content"]