diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index d16236737c4..99012c73c1b 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -405,7 +405,7 @@ class ContextCompressor(ContextEngine):
         self,
         model: str,
         threshold_percent: float = 0.50,
-        protect_first_n: int = 3,
+        protect_first_n: int = 2,
         protect_last_n: int = 20,
         summary_target_ratio: float = 0.20,
         quiet_mode: bool = False,
@@ -1185,6 +1185,26 @@ The user has requested that this compaction PRIORITISE preserving all informatio
             idx += 1
         return idx
 
+    def _protect_head_size(self, messages: List[Dict[str, Any]]) -> int:
+        """Total count of head messages to protect.
+
+        ``protect_first_n`` is defined as *additional* messages protected
+        beyond the system prompt.  The system prompt (if present at index 0)
+        is always implicitly protected — it's load-bearing context that
+        must never be summarised away.  This keeps semantics stable across
+        call paths where the system prompt may or may not be included in
+        the ``messages`` list (e.g. the gateway ``/compress`` handler
+        strips it before calling compress()).
+
+        Examples:
+          protect_first_n=0 → system prompt only (or nothing if no system msg)
+          protect_first_n=3 → system + first 3 non-system messages
+        """
+        head = 0
+        if messages and messages[0].get("role") == "system":
+            head = 1
+        return head + self.protect_first_n
+
     def _align_boundary_backward(self, messages: List[Dict[str, Any]], idx: int) -> int:
         """Pull a compress-end boundary backward to avoid splitting a
         tool_call / result group.
@@ -1343,7 +1363,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
         skip the LLM call when the transcript is still entirely inside
         the protected head/tail.
         """
-        compress_start = self._align_boundary_forward(messages, self.protect_first_n)
+        compress_start = self._align_boundary_forward(messages, self._protect_head_size(messages))
         compress_end = self._find_tail_cut_by_tokens(messages, compress_start)
         return compress_start < compress_end
 
@@ -1379,7 +1399,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
         self._last_aux_model_failure_model = None
         n_messages = len(messages)
         # Only need head + 3 tail messages minimum (token budget decides the real tail size)
-        _min_for_compress = self.protect_first_n + 3 + 1
+        _min_for_compress = self._protect_head_size(messages) + 3 + 1
         if n_messages <= _min_for_compress:
             if not self.quiet_mode:
                 logger.warning(
@@ -1399,7 +1419,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
             logger.info("Pre-compression: pruned %d old tool result(s)", pruned_count)
 
         # Phase 2: Determine boundaries
-        compress_start = self.protect_first_n
+        compress_start = self._protect_head_size(messages)
         compress_start = self._align_boundary_forward(messages, compress_start)
 
         # Use token-budget tail protection instead of fixed message count
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 6daceba04a9..1bfec39698a 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -364,6 +364,18 @@ compression:
   # compression of older turns.
   protect_last_n: 20
 
+  # Number of non-system messages to protect at the head of the transcript, in
+  # ADDITION to the system prompt (which is always implicitly protected).
+  # Head messages are NEVER summarized — they survive every compression
+  # indefinitely. This gives stable early context for short/medium sessions,
+  # but in long-running sessions that rely on rolling compaction the pinned
+  # opening turns may not match how you want the session framed over time.
+  # Set to 0 to preserve ONLY the system prompt (plus the rolling summary
+  # and recent tail) — the cleanest configuration for long-running sessions.
+  # Default 2 preserves the system prompt plus the first user/assistant
+  # exchange (≈ 3 messages total when a system prompt is present).
+  protect_first_n: 2
+
   # To pin a specific model/provider for compression summaries, use the
   # auxiliary section below (auxiliary.compression.provider / model).
 
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index fd9784d7847..3feb2cbddbb 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -731,8 +731,13 @@ DEFAULT_CONFIG = {
         "target_ratio": 0.20,         # fraction of threshold to preserve as recent tail
         "protect_last_n": 20,         # minimum recent messages to keep uncompressed
         "hygiene_hard_message_limit": 400,  # gateway session-hygiene force-compress threshold by message count
+        "protect_first_n": 2,         # non-system head messages always preserved beyond the system prompt
+                                      # verbatim, in ADDITION to the system prompt
+                                      # (which is always implicitly protected). Set to
+                                      # 0 for long-running rolling-compaction sessions
+                                      # where you want nothing pinned except the
+                                      # system prompt + rolling summary + recent tail.
     },
-
     # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
     # cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
     "prompt_caching": {
@@ -4862,6 +4867,7 @@ def show_config():
         print(f"  Threshold:    {compression.get('threshold', 0.50) * 100:.0f}%")
         print(f"  Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved")
         print(f"  Protect last: {compression.get('protect_last_n', 20)} messages")
+        print(f"  Protect first: {compression.get('protect_first_n', 2)} non-system head messages")
         _aux_comp = config.get('auxiliary', {}).get('compression', {})
         _sm = _aux_comp.get('model', '') or '(auto)'
         print(f"  Model:        {_sm}")
diff --git a/run_agent.py b/run_agent.py
index 53177931b81..8c7dfe2b061 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2115,6 +2115,15 @@ class AIAgent:
         compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in {"true", "1", "yes"}
         compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.20))
         compression_protect_last = int(_compression_cfg.get("protect_last_n", 20))
+        # protect_first_n is the number of non-system messages to protect at
+        # the head, in addition to the system prompt (which is always
+        # implicitly protected by the compressor).  Floor at 0 — a value of
+        # 0 means "preserve only the system prompt + summary + tail", which
+        # is a legitimate (and common) configuration for long-running
+        # rolling-compaction sessions.
+        compression_protect_first = max(
+            0, int(_compression_cfg.get("protect_first_n", 2))
+        )
 
         # Read optional explicit context_length override for the auxiliary
         # compression model. Custom endpoints often cannot report this via
@@ -2315,7 +2324,7 @@ class AIAgent:
             self.context_compressor = ContextCompressor(
                 model=self.model,
                 threshold_percent=compression_threshold,
-                protect_first_n=3,
+                protect_first_n=compression_protect_first,
                 protect_last_n=compression_protect_last,
                 summary_target_ratio=compression_target_ratio,
                 summary_model_override=None,
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index 97a7c7b3d0f..821d3c4c4b7 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -991,9 +991,12 @@ class TestCompressWithClient:
         mock_client.chat.completions.create.return_value = mock_response
 
         with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3, protect_last_n=2)
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
 
         # Last head message (index 2) is "user" → summary should be "assistant"
+        # NOTE: protect_first_n=2 preserves 2 non-system messages in addition to
+        # the system prompt (always implicitly protected), yielding head [system,
+        # user, user] with last head = user.
         msgs = [
             {"role": "system", "content": "system prompt"},
             {"role": "user", "content": "msg 1"},
@@ -1059,11 +1062,13 @@ class TestCompressWithClient:
         mock_response.choices[0].message.content = "summary text"
 
         with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3, protect_last_n=3)
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=3)
 
         # Head: [system, user, assistant]  →  last head = assistant
         # Tail: [user, assistant, user]    →  first tail = user
         # summary_role="user" collides with tail, "assistant" collides with head → merge
+        # NOTE: protect_first_n=2 preserves 2 non-system messages in addition to
+        # the system prompt (always implicitly protected).
         msgs = [
             {"role": "system", "content": "system prompt"},
             {"role": "user", "content": "msg 1"},
@@ -1097,7 +1102,7 @@ class TestCompressWithClient:
         mock_response.choices[0].message.content = "summary text"
 
         with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3, protect_last_n=3)
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=3)
 
         msgs = [
             {"role": "system", "content": "system prompt"},
@@ -1133,13 +1138,15 @@ class TestCompressWithClient:
         mock_response.choices[0].message.content = "summary text"
 
         with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=1, protect_last_n=2)
 
         # Head: [system, user]        → last head = user
         # Tail: [assistant, user, assistant] → first tail = assistant
         # summary_role="assistant" collides with tail, "user" collides with head → merge
+        # NOTE: protect_first_n=1 preserves 1 non-system message in addition to
+        # the system prompt (always implicitly protected).
         # With min_tail=3, tail = last 3 messages (indices 5-7).
-        # Need 8 messages: min_for_compress = 2+3+1 = 6, must have > 6.
+        # Need 8 messages: _min_for_compress = head(2) + 3 + 1 = 6, must have > 6.
         msgs = [
             {"role": "system", "content": "system prompt"},
             {"role": "user", "content": "msg 1"},
@@ -1292,6 +1299,90 @@ class TestSummaryTargetRatio:
             c = ContextCompressor(model="test", quiet_mode=True)
         assert c.protect_last_n == 20
 
+    def test_default_protect_first_n_is_2(self):
+        """Default protect_first_n is 2 (system + 2 extra non-system messages =
+        3 protected messages total, preserving the pre-feature behaviour where
+        protect_first_n was hardcoded to protect 3 head messages total).
+        """
+        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
+            c = ContextCompressor(model="test", quiet_mode=True)
+        assert c.protect_first_n == 2
+
+    def test_protect_first_n_override(self):
+        """protect_first_n=0 should be honoured — for users who rely on rolling
+        compaction and want NOTHING pinned at head except the system prompt
+        (always implicitly protected)."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=0)
+        assert c.protect_first_n == 0
+
+    def test_protect_first_n_0_preserves_only_system_prompt(self):
+        """End-to-end: when protect_first_n=0, compression should treat only
+        the system prompt as head.  All user/assistant messages between the
+        system prompt and the protected tail become summarization candidates.
+
+        This is the cleanest configuration for long-running rolling-compaction
+        sessions — no user/assistant turn gets pinned verbatim forever just
+        because it happened to be early in the session."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
+            c = ContextCompressor(
+                model="test",
+                quiet_mode=True,
+                protect_first_n=0,
+                protect_last_n=2,
+            )
+        msgs = (
+            [{"role": "system", "content": "System prompt"}]
+            + [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
+               for i in range(8)]
+        )
+        result = c.compress(msgs)
+        # System prompt (msg[0]) survives as head
+        assert result[0]["role"] == "system"
+        assert result[0]["content"].startswith("System prompt")
+        # The first user/assistant exchange (msg 0, msg 1) should NOT be pinned
+        # as head verbatim — those would have been summarized or absorbed.
+        # Under default protect_first_n=2, result[1] and result[2] would be
+        # the literal "msg 0" / "msg 1"; with protect_first_n=0 they aren't.
+        assert result[1].get("content") != "msg 0"
+        # Last 2 messages are tail-protected under protect_last_n=2
+        assert result[-1]["content"] == msgs[-1]["content"]
+
+    def test_protect_first_n_semantics_stable_without_system_prompt(self):
+        """Regression: gateway /compress handler strips the system prompt
+        before calling compress().  protect_first_n must mean the same thing
+        in both paths — "N non-system head messages" — so configuring
+        protect_first_n=0 preserves NOTHING at the head regardless of whether
+        the system prompt is in the messages list.
+
+        Bug this covers: under the old semantics, protect_first_n counted
+        literally from messages[0].  In the gateway path (no system prompt)
+        that meant protect_first_n=1 would pin the first user turn of the
+        session forever — a user-reported complaint that a week-old
+        resolved question kept getting reinserted into every compaction
+        summary."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
+            c = ContextCompressor(
+                model="test",
+                quiet_mode=True,
+                protect_first_n=0,
+                protect_last_n=2,
+            )
+        # No system prompt — this is what the gateway passes to compress().
+        msgs = [
+            {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
+            for i in range(10)
+        ]
+        head_size = c._protect_head_size(msgs)
+        # With no system prompt and protect_first_n=0 → head is empty.
+        # The first user message is NOT pinned as head.
+        assert head_size == 0
+
+        # And with protect_first_n=3 on the same no-system-prompt list →
+        # head size is 3 (the three earliest non-system messages).
+        c.protect_first_n = 3
+        assert c._protect_head_size(msgs) == 3
+
 
 class TestTokenBudgetTailProtection:
     """Tests for token-budget-based tail protection (PR #6240).