diff --git a/run_agent.py b/run_agent.py index ecaa92b41..2fed5c36c 100644 --- a/run_agent.py +++ b/run_agent.py @@ -339,10 +339,7 @@ def _paths_overlap(left: Path, right: Path) -> bool: _SURROGATE_RE = re.compile(r'[\ud800-\udfff]') -_BUDGET_WARNING_RE = re.compile( - r"\[BUDGET(?:\s+WARNING)?:\s+Iteration\s+\d+/\d+\..*?\]", - re.DOTALL, -) + def _sanitize_surrogates(text: str) -> str: @@ -463,34 +460,7 @@ def _sanitize_messages_non_ascii(messages: list) -> bool: return found -def _strip_budget_warnings_from_history(messages: list) -> None: - """Remove budget pressure warnings from tool-result messages in-place. - Budget warnings are turn-scoped signals that must not leak into replayed - history. They live in tool-result ``content`` either as a JSON key - (``_budget_warning``) or appended plain text. - """ - for msg in messages: - if not isinstance(msg, dict) or msg.get("role") != "tool": - continue - content = msg.get("content") - if not isinstance(content, str) or "_budget_warning" not in content and "[BUDGET" not in content: - continue - - # Try JSON first (the common case: _budget_warning key in a dict) - try: - parsed = json.loads(content) - if isinstance(parsed, dict) and "_budget_warning" in parsed: - del parsed["_budget_warning"] - msg["content"] = json.dumps(parsed, ensure_ascii=False) - continue - except (json.JSONDecodeError, TypeError): - pass - - # Fallback: strip the text pattern from plain-text tool results - cleaned = _BUDGET_WARNING_RE.sub("", content).strip() - if cleaned != content: - msg["content"] = cleaned # ========================================================================= @@ -6989,24 +6959,6 @@ class AIAgent: turn_tool_msgs = messages[-num_tools:] enforce_turn_budget(turn_tool_msgs, env=get_active_env(effective_task_id)) - # ── Budget pressure injection ──────────────────────────────────── - budget_warning = self._get_budget_warning(api_call_count) - if budget_warning and messages and messages[-1].get("role") == "tool": - last_content = messages[-1]["content"] - try: - parsed = json.loads(last_content) - if isinstance(parsed, dict): - parsed["_budget_warning"] = budget_warning - messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False) - else: - messages[-1]["content"] = last_content + f"\n\n{budget_warning}" - except (json.JSONDecodeError, TypeError): - messages[-1]["content"] = last_content + f"\n\n{budget_warning}" - if not self.quiet_mode: - remaining = self.max_iterations - api_call_count - tier = "⚠️ WARNING" if remaining <= self.max_iterations * 0.1 else "💡 CAUTION" - print(f"{self.log_prefix}{tier}: {remaining} iterations remaining") - def _execute_tool_calls_sequential(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None: """Execute tool calls sequentially (original behavior). Used for single calls or interactive tools.""" for i, tool_call in enumerate(assistant_message.tool_calls, 1): @@ -7353,36 +7305,7 @@ class AIAgent: if num_tools_seq > 0: enforce_turn_budget(messages[-num_tools_seq:], env=get_active_env(effective_task_id)) - # ── Budget pressure injection ───────────────────────────────── - # After all tool calls in this turn are processed, check if we're - # approaching max_iterations. If so, inject a warning into the LAST - # tool result's JSON so the LLM sees it naturally when reading results. - budget_warning = self._get_budget_warning(api_call_count) - if budget_warning and messages and messages[-1].get("role") == "tool": - last_content = messages[-1]["content"] - try: - parsed = json.loads(last_content) - if isinstance(parsed, dict): - parsed["_budget_warning"] = budget_warning - messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False) - else: - messages[-1]["content"] = last_content + f"\n\n{budget_warning}" - except (json.JSONDecodeError, TypeError): - messages[-1]["content"] = last_content + f"\n\n{budget_warning}" - if not self.quiet_mode: - remaining = self.max_iterations - api_call_count - tier = "⚠️ WARNING" if remaining <= self.max_iterations * 0.1 else "💡 CAUTION" - print(f"{self.log_prefix}{tier}: {remaining} iterations remaining") - def _get_budget_warning(self, api_call_count: int) -> Optional[str]: - """Return a budget pressure string, or None if not yet needed. - - Only fires once the iteration budget is fully exhausted. No - intermediate warnings — those caused models to abandon complex - tasks prematurely. - """ - # Never inject warnings during the normal run - return None def _emit_context_pressure(self, compaction_progress: float, compressor) -> None: """Notify the user that context is approaching the compaction threshold. @@ -7675,14 +7598,6 @@ class AIAgent: # Initialize conversation (copy to avoid mutating the caller's list) messages = list(conversation_history) if conversation_history else [] - # Strip budget pressure warnings from previous turns. These are - # turn-scoped signals injected by _get_budget_warning() into tool - # result content. If left in the replayed history, models (especially - # GPT-family) interpret them as still-active instructions and avoid - # making tool calls in ALL subsequent turns. - if messages: - _strip_budget_warnings_from_history(messages) - # Hydrate todo store from conversation history (gateway creates a fresh # AIAgent per message, so the in-memory store is empty -- we need to # recover the todo state from the most recent todo tool response in history) diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py index 3b6a4c3ec..1f2f6ada7 100644 --- a/tests/agent/test_prompt_builder.py +++ b/tests/agent/test_prompt_builder.py @@ -1009,65 +1009,4 @@ class TestOpenAIModelExecutionGuidance: # ========================================================================= -class TestStripBudgetWarningsFromHistory: - def test_strips_json_budget_warning_key(self): - import json - from run_agent import _strip_budget_warnings_from_history - messages = [ - {"role": "tool", "tool_call_id": "c1", "content": json.dumps({ - "output": "hello", - "exit_code": 0, - "_budget_warning": "[BUDGET: Iteration 55/60. 5 iterations left. Start consolidating your work.]", - })}, - ] - _strip_budget_warnings_from_history(messages) - parsed = json.loads(messages[0]["content"]) - assert "_budget_warning" not in parsed - assert parsed["output"] == "hello" - assert parsed["exit_code"] == 0 - - def test_strips_text_budget_warning(self): - from run_agent import _strip_budget_warnings_from_history - - messages = [ - {"role": "tool", "tool_call_id": "c1", - "content": "some result\n\n[BUDGET WARNING: Iteration 58/60. Only 2 iteration(s) left. Provide your final response NOW. No more tool calls unless absolutely critical.]"}, - ] - _strip_budget_warnings_from_history(messages) - assert messages[0]["content"] == "some result" - - def test_leaves_non_tool_messages_unchanged(self): - from run_agent import _strip_budget_warnings_from_history - - messages = [ - {"role": "assistant", "content": "[BUDGET WARNING: Iteration 58/60. Only 2 iteration(s) left. Provide your final response NOW. No more tool calls unless absolutely critical.]"}, - {"role": "user", "content": "hello"}, - ] - original_contents = [m["content"] for m in messages] - _strip_budget_warnings_from_history(messages) - assert [m["content"] for m in messages] == original_contents - - def test_handles_empty_and_missing_content(self): - from run_agent import _strip_budget_warnings_from_history - - messages = [ - {"role": "tool", "tool_call_id": "c1", "content": ""}, - {"role": "tool", "tool_call_id": "c2"}, - ] - _strip_budget_warnings_from_history(messages) - assert messages[0]["content"] == "" - - def test_strips_caution_variant(self): - import json - from run_agent import _strip_budget_warnings_from_history - - messages = [ - {"role": "tool", "tool_call_id": "c1", "content": json.dumps({ - "output": "ok", - "_budget_warning": "[BUDGET: Iteration 42/60. 18 iterations left. Start consolidating your work.]", - })}, - ] - _strip_budget_warnings_from_history(messages) - parsed = json.loads(messages[0]["content"]) - assert "_budget_warning" not in parsed diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index d19220e50..d716b59b2 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -2742,19 +2742,7 @@ class TestSystemPromptStability: assert "Hermes Agent" in agent._cached_system_prompt class TestBudgetPressure: - """Budget pressure warning system — now only fires at budget exhaustion.""" - - def test_no_intermediate_warnings(self, agent): - """No warnings at 70% or 90% — only at actual exhaustion.""" - agent.max_iterations = 60 - assert agent._get_budget_warning(30) is None # 50% - assert agent._get_budget_warning(42) is None # 70% - assert agent._get_budget_warning(54) is None # 90% - assert agent._get_budget_warning(59) is None # last iteration - - def test_zero_max_iterations(self, agent): - agent.max_iterations = 0 - assert agent._get_budget_warning(0) is None + """Budget exhaustion grace call system.""" def test_grace_call_flags_initialized(self, agent): """Agent should have budget grace call flags."""