diff --git a/agent/context_compressor.py b/agent/context_compressor.py index a681b0c6b..d6ad52e0d 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -31,6 +31,7 @@ from agent.model_metadata import ( get_model_context_length, estimate_messages_tokens_rough, ) +from agent.redact import redact_sensitive_text logger = logging.getLogger(__name__) @@ -593,7 +594,13 @@ class ContextCompressor(ContextEngine): content = content[:self._CONTENT_HEAD] + "\n...[truncated]...\n" + content[-self._CONTENT_TAIL:] parts.append(f"[{role.upper()}]: {content}") - return "\n\n".join(parts) + # Scrub credential-like values before sending to the summarizer. + # The summarizer is instructed to preserve "specific values" so raw + # API keys, bearer tokens, or env-var assignments that leak in via + # tool output (terminal, file_read, curl -v) would otherwise be + # copied verbatim into the persistent summary and re-injected on + # every subsequent compaction. Ported from openclaw/openclaw#67801. + return redact_sensitive_text("\n\n".join(parts)) def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]], focus_topic: str = None) -> Optional[str]: """Generate a structured summary of conversation turns. @@ -699,13 +706,17 @@ Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command out Write only the summary body. Do not include any preamble or prefix.""" if self._previous_summary: - # Iterative update: preserve existing info, add new progress + # Iterative update: preserve existing info, add new progress. + # Re-scrub the previous summary in case it was produced before + # output-side redaction was added or restored from older session + # state. (Idempotent on already-clean text.) + previous_summary_clean = redact_sensitive_text(self._previous_summary) prompt = f"""{_summarizer_preamble} You are updating a context compaction summary. A previous compaction produced the summary below. New conversation turns have occurred since then and need to be incorporated. PREVIOUS SUMMARY: -{self._previous_summary} +{previous_summary_clean} NEW TURNS TO INCORPORATE: {content_to_summarize} @@ -756,6 +767,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio if not isinstance(content, str): content = str(content) if content else "" summary = content.strip() + # Defense-in-depth: scrub any credential-like values that the + # summarizer may have echoed back from the input. Input is already + # scrubbed in _serialize_for_summary, but a poorly-behaved + # summarizer model could paraphrase a secret ("the API key was + # sk-..."). Ported from openclaw/openclaw#67801. + summary = redact_sensitive_text(summary) # Store for iterative updates on next compaction self._previous_summary = summary self._summary_failure_cooldown_until = 0.0 diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 0c20dddcd..652c4e3e9 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -905,3 +905,102 @@ class TestTruncateToolCallArgsJson: parsed = _json.loads(shrunk) assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md" assert parsed["content"].endswith("...[truncated]") + + +class TestSerializationRedaction: + """Regression tests for the openclaw/openclaw#67801 port. + + The summarizer is instructed to preserve specific values, so credential-like + strings surfaced through tool output (e.g. echo env vars, curl -v, reading + a .env file) must be scrubbed before they reach the summary prompt — + otherwise they get copied verbatim into the persistent summary and + re-injected on every subsequent compaction. + """ + + def test_api_key_prefix_redacted_from_tool_result(self, compressor): + secret = "sk-proj-abc123DEADBEEFdef456GHIJKL789mnop0123QRSTUVwxYZ" + turns = [ + {"role": "user", "content": "show me the openai key"}, + {"role": "assistant", "content": None, "tool_calls": [ + {"id": "c1", "type": "function", + "function": {"name": "terminal", + "arguments": '{"command": "echo $OPENAI_API_KEY"}'}}, + ]}, + {"role": "tool", "tool_call_id": "c1", "content": secret}, + {"role": "user", "content": "thanks"}, + ] + serialized = compressor._serialize_for_summary(turns) + assert secret not in serialized + # At least one form of masked output should remain; redact never + # removes everything — it replaces with a masked form. + assert len(serialized) > 0 + + def test_env_assignment_redacted(self, compressor): + secret = "sk-verysecretvalue123456789abcdef" + turns = [ + {"role": "tool", "tool_call_id": "c1", + "content": f"OPENAI_API_KEY={secret}\nOTHER_VAR=harmless"}, + ] + serialized = compressor._serialize_for_summary(turns) + assert secret not in serialized + assert "OPENAI_API_KEY=" in serialized + + def test_authorization_header_redacted(self, compressor): + secret = "ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZ123456" + turns = [ + {"role": "tool", "tool_call_id": "c1", + "content": f"curl -H 'Authorization: Bearer {secret}' https://api.github.com"}, + ] + serialized = compressor._serialize_for_summary(turns) + assert secret not in serialized + + def test_json_api_key_field_redacted(self, compressor): + secret = "xoxb-11111-22222-deadbeefcafebabefeed" + turns = [ + {"role": "tool", "tool_call_id": "c1", + "content": '{"apiKey": "' + secret + '"}'}, + ] + serialized = compressor._serialize_for_summary(turns) + assert secret not in serialized + + def test_non_secret_content_preserved(self, compressor): + """Redaction must not damage legitimate content — file paths, UUIDs, + port numbers, error messages should all survive.""" + turns = [ + {"role": "user", "content": "fix the bug at /home/user/repo/src/main.py:42"}, + {"role": "assistant", "content": + "Fixed. The UUID 550e8400-e29b-41d4-a716-446655440000 is now " + "correctly handled. Server listens on 127.0.0.1:8080."}, + {"role": "tool", "tool_call_id": "c1", + "content": "ImportError: No module named 'foo'"}, + ] + serialized = compressor._serialize_for_summary(turns) + assert "/home/user/repo/src/main.py:42" in serialized + assert "550e8400-e29b-41d4-a716-446655440000" in serialized + assert "127.0.0.1:8080" in serialized + assert "ImportError: No module named 'foo'" in serialized + + def test_stored_summary_is_redacted(self, compressor): + """If the summarizer echoes a secret back, the stored summary must be + scrubbed before being retained in _previous_summary.""" + secret = "sk-leakedfromsummarizer9876543210" + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message = MagicMock() + mock_response.choices[0].message.content = ( + f"The user set OPENAI_API_KEY={secret} and ran the script." + ) + mock_response.usage = None + + fake_client = MagicMock() + fake_client.chat.completions.create.return_value = mock_response + compressor.client = fake_client + + turns = [ + {"role": "user", "content": "set up the key"}, + {"role": "assistant", "content": "done"}, + ] + summary = compressor._generate_summary(turns) + assert summary is not None + assert secret not in summary + assert secret not in (compressor._previous_summary or "")