From f6a42b1acf23a476f84f4b6adf78c62580cc4ac1 Mon Sep 17 00:00:00 2001
From: Wolfram Ravenwolf <github.com@wolfram.ravenwolf.de>
Date: Sat, 11 Apr 2026 03:34:08 +0200
Subject: [PATCH] feat(prompt): make context-file truncation limit configurable

PROBLEM: Automatic context files such as SOUL.md and AGENTS.md were capped by a hardcoded CONTEXT_FILE_MAX_CHARS value. Amy's local fork had raised that constant from 20K to 25K so a larger SOUL.md would not be silently truncated, but the hardcoded 25K value changed upstream default behavior and made the patch less generally useful.

SOLUTION: Restore the upstream-compatible 20K default, add a context_file_max_chars config setting for users who intentionally keep larger identity/project-context files, keep chat-visible truncation warnings, and document the new setting. Tests cover the default, config override, explicit max_chars precedence, and the warning text.
---
 agent/prompt_builder.py                       | 39 +++++++++++++-
 agent/system_prompt.py                        | 10 +++-
 hermes_cli/config.py                          |  5 ++
 tests/agent/test_prompt_builder.py            | 52 +++++++++++++++++++
 .../docs/developer-guide/prompt-assembly.md   |  4 +-
 website/docs/user-guide/configuration.md      | 16 +++++-
 .../docs/user-guide/features/context-files.md |  8 +--
 .../developer-guide/prompt-assembly.md        |  2 +-
 8 files changed, 126 insertions(+), 10 deletions(-)

diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
index b11cade39bd..e095857545b 100644
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -958,6 +958,34 @@ CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
 CONTEXT_TRUNCATE_TAIL_RATIO = 0.2
 
 
+def _get_context_file_max_chars() -> int:
+    """Return the configured context-file truncation limit.
+
+    ``CONTEXT_FILE_MAX_CHARS`` remains the upstream-compatible default and
+    fallback. Users with larger context windows can raise
+    ``context_file_max_chars`` in config.yaml without patching Hermes.
+    """
+    try:
+        from hermes_cli.config import load_config
+
+        val = load_config().get("context_file_max_chars")
+        if isinstance(val, (int, float)) and val > 0:
+            return int(val)
+    except Exception as e:
+        logger.debug("Could not read context_file_max_chars from config: %s", e)
+    return CONTEXT_FILE_MAX_CHARS
+
+# Collect truncation warnings so the caller (run_agent) can surface them.
+_truncation_warnings: list = []
+
+
+def drain_truncation_warnings() -> list:
+    """Return and clear any truncation warnings accumulated since last drain."""
+    warnings = _truncation_warnings.copy()
+    _truncation_warnings.clear()
+    return warnings
+
+
 # =========================================================================
 # Skills prompt cache
 # =========================================================================
@@ -1463,10 +1491,19 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
 # Context files (SOUL.md, AGENTS.md, .cursorrules)
 # =========================================================================
 
-def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str:
+def _truncate_content(content: str, filename: str, max_chars: Optional[int] = None) -> str:
     """Head/tail truncation with a marker in the middle."""
+    if max_chars is None:
+        max_chars = _get_context_file_max_chars()
     if len(content) <= max_chars:
         return content
+    msg = (
+        f"⚠️  Context file {filename} TRUNCATED: "
+        f"{len(content)} chars exceeds limit of {max_chars} — "
+        f"increase context_file_max_chars or trim the file!"
+    )
+    logger.warning(msg)
+    _truncation_warnings.append(msg)
     head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO)
     tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO)
     head = content[:head_chars]
diff --git a/agent/system_prompt.py b/agent/system_prompt.py
index 76f57dfcdbc..9c0e1424245 100644
--- a/agent/system_prompt.py
+++ b/agent/system_prompt.py
@@ -40,6 +40,7 @@ from agent.prompt_builder import (
     TASK_COMPLETION_GUIDANCE,
     TOOL_USE_ENFORCEMENT_GUIDANCE,
     TOOL_USE_ENFORCEMENT_MODELS,
+    drain_truncation_warnings,
 )
 from agent.runtime_cwd import resolve_context_cwd
 
@@ -400,7 +401,14 @@ def build_system_prompt(agent: Any, system_message: Optional[str] = None) -> str
     warm across turns.
     """
     parts = build_system_prompt_parts(agent, system_message=system_message)
-    return "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
+    joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
+
+    # Surface context-file truncation warnings through the normal agent status
+    # channel so gateway/CLI users see them in chat instead of only in logs.
+    for warning in drain_truncation_warnings():
+        agent._emit_status(warning)
+
+    return joined
 
 
 def invalidate_system_prompt(agent: Any) -> None:
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 4f801e2e9b4..2c17717c86b 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -1104,6 +1104,11 @@ DEFAULT_CONFIG = {
         "min_interval_hours": 24,
     },
 
+    # Maximum characters loaded from a single automatic context file such as
+    # SOUL.md, AGENTS.md, CLAUDE.md, .hermes.md, or .cursorrules before Hermes
+    # applies head/tail truncation. This is separate from read_file tool limits.
+    "context_file_max_chars": 20_000,
+
     # Maximum characters returned by a single read_file call.  Reads that
     # exceed this are rejected with guidance to use offset+limit.
     # 100K chars ≈ 25–35K tokens across typical tokenisers.
diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py
index e6c302fdb92..0fc727f2af5 100644
--- a/tests/agent/test_prompt_builder.py
+++ b/tests/agent/test_prompt_builder.py
@@ -20,6 +20,7 @@ from agent.prompt_builder import (
     build_context_files_prompt,
     CONTEXT_FILE_MAX_CHARS,
     DEFAULT_AGENT_IDENTITY,
+    drain_truncation_warnings,
     TOOL_USE_ENFORCEMENT_GUIDANCE,
     TOOL_USE_ENFORCEMENT_MODELS,
     OPENAI_MODEL_EXECUTION_GUIDANCE,
@@ -113,6 +114,18 @@ class TestScanContextContent:
 
 
 class TestTruncateContent:
+    @pytest.fixture(autouse=True)
+    def _reset_truncation_state(self, monkeypatch):
+        drain_truncation_warnings()
+
+        def default_load_config():
+            return {}
+
+        monkeypatch.setattr("hermes_cli.config.load_config", default_load_config)
+
+    def test_context_file_max_chars_default_matches_upstream_limit(self):
+        assert CONTEXT_FILE_MAX_CHARS == 20_000
+
     def test_short_content_unchanged(self):
         content = "Short content"
         result = _truncate_content(content, "test.md")
@@ -138,6 +151,45 @@ class TestTruncateContent:
         result = _truncate_content(content, "exact.md")
         assert result == content
 
+    def test_configured_context_file_max_chars_controls_truncation(self, monkeypatch):
+        def fake_load_config():
+            return {"context_file_max_chars": 120}
+
+        monkeypatch.setattr("hermes_cli.config.load_config", fake_load_config)
+        content = "HEAD" + "x" * 160 + "TAIL"
+
+        result = _truncate_content(content, "config.md")
+
+        assert result != content
+        assert "truncated config.md" in result
+        assert "kept 84+24" in result
+        assert "HEAD" in result
+        assert "TAIL" in result
+
+    def test_explicit_max_chars_overrides_config(self, monkeypatch):
+        def fake_load_config():
+            return {"context_file_max_chars": 120}
+
+        monkeypatch.setattr("hermes_cli.config.load_config", fake_load_config)
+        content = "x" * 180
+
+        result = _truncate_content(content, "explicit.md", max_chars=200)
+
+        assert result == content
+
+    def test_truncation_warning_points_to_config_key(self, monkeypatch):
+        def fake_load_config():
+            return {"context_file_max_chars": 120}
+
+        monkeypatch.setattr("hermes_cli.config.load_config", fake_load_config)
+
+        _truncate_content("x" * 180, "warning.md")
+
+        warnings = drain_truncation_warnings()
+        assert len(warnings) == 1
+        assert "context_file_max_chars" in warnings[0]
+        assert "CONTEXT_FILE_MAX_CHARS" not in warnings[0]
+
 
 # =========================================================================
 # _parse_skill_file — single-pass skill file reading
diff --git a/website/docs/developer-guide/prompt-assembly.md b/website/docs/developer-guide/prompt-assembly.md
index d4b31027e2f..d255c4a2e93 100644
--- a/website/docs/developer-guide/prompt-assembly.md
+++ b/website/docs/developer-guide/prompt-assembly.md
@@ -128,7 +128,7 @@ def load_soul_md() -> Optional[str]:
         return None
     content = soul_path.read_text(encoding="utf-8").strip()
     content = _scan_context_content(content, "SOUL.md")  # Security scan
-    content = _truncate_content(content, "SOUL.md")       # Cap at 20k chars
+    content = _truncate_content(content, "SOUL.md")       # Cap defaults to 20k chars, configurable
     return content
 ```
 
@@ -195,7 +195,7 @@ def build_context_files_prompt(cwd=None, skip_soul=False):
 
 All context files are:
 - **Security scanned** — checked for prompt injection patterns (invisible unicode, "ignore previous instructions", credential exfiltration attempts)
-- **Truncated** — capped at 20,000 characters using 70/20 head/tail ratio with a truncation marker
+- **Truncated** — capped at `context_file_max_chars` characters (default 20,000) using 70/20 head/tail ratio with a truncation marker
 - **YAML frontmatter stripped** — `.hermes.md` frontmatter is removed (reserved for future config overrides)
 
 ## API-call-time-only layers
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index e22d143ce30..307ec5a2e45 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -606,6 +606,20 @@ memory:
 
 With `memory.write_approval: true`, memory writes need your approval before they land: interactive CLI turns prompt inline; messaging sessions and the background self-improvement review stage the write for `/memory pending` → `/memory approve <id>` / `/memory reject <id>` review. Toggle at runtime with `/memory approval on|off`. See [Controlling memory writes](/user-guide/features/memory#controlling-memory-writes-write_approval).
 
+## Context File Truncation
+
+Controls how much content Hermes loads from each automatic context file before applying head/tail truncation. This applies to files injected into the system prompt such as `SOUL.md`, `.hermes.md`, `AGENTS.md`, `CLAUDE.md`, and `.cursorrules`. It does **not** affect the `read_file` tool.
+
+```yaml
+context_file_max_chars: 20000  # default
+```
+
+Raise it when you intentionally keep larger identity or project-context files and run models with enough context window to carry them:
+
+```yaml
+context_file_max_chars: 25000
+```
+
 ## File Read Safety
 
 Controls how much content a single `read_file` call can return. Reads that exceed the limit are rejected with an error telling the agent to use `offset` and `limit` for a smaller range. This prevents a single read of a minified JS bundle or large data file from flooding the context window.
@@ -1839,7 +1853,7 @@ Hermes uses two different context scopes:
 - **Project context files use a priority system** — only ONE type is loaded (first match wins): `.hermes.md` → `AGENTS.md` → `CLAUDE.md` → `.cursorrules`. SOUL.md is always loaded independently.
 - **AGENTS.md** is hierarchical: if subdirectories also have AGENTS.md, all are combined.
 - Hermes automatically seeds a default `SOUL.md` if one does not already exist.
-- All loaded context files are capped at 20,000 characters with smart truncation.
+- All loaded context files are capped at `context_file_max_chars` characters (default 20,000) with smart truncation.
 
 See also:
 - [Personality & SOUL.md](/user-guide/features/personality)
diff --git a/website/docs/user-guide/features/context-files.md b/website/docs/user-guide/features/context-files.md
index 86766e69f07..195201439f2 100644
--- a/website/docs/user-guide/features/context-files.md
+++ b/website/docs/user-guide/features/context-files.md
@@ -109,7 +109,7 @@ Context files are loaded by `build_context_files_prompt()` in `agent/prompt_buil
 1. **Scan working directory** — checks for `.hermes.md` → `AGENTS.md` → `CLAUDE.md` → `.cursorrules` (first match wins)
 2. **Content is read** — each file is read as UTF-8 text
 3. **Security scan** — content is checked for prompt injection patterns
-4. **Truncation** — files exceeding 20,000 characters are head/tail truncated (70% head, 20% tail, with a marker in the middle)
+4. **Truncation** — files exceeding `context_file_max_chars` characters (default 20,000) are head/tail truncated (70% head, 20% tail, with a marker in the middle)
 5. **Assembly** — all sections are combined under a `# Project Context` header
 6. **Injection** — the assembled content is added to the system prompt
 
@@ -171,12 +171,12 @@ This scanner protects against common injection patterns, but it's not a substitu
 
 | Limit | Value |
 |-------|-------|
-| Max chars per file | 20,000 (~7,000 tokens) |
+| Max chars per file | `context_file_max_chars` (default 20,000, ~7,000 tokens) |
 | Head truncation ratio | 70% |
 | Tail truncation ratio | 20% |
 | Truncation marker | 10% (shows char counts and suggests using file tools) |
 
-When a file exceeds 20,000 characters, the truncation message reads:
+When a file exceeds the configured limit, the truncation message reads:
 
 ```
 [...truncated AGENTS.md: kept 14000+4000 of 25000 chars. Use file tools to read the full file.]
@@ -185,7 +185,7 @@ When a file exceeds 20,000 characters, the truncation message reads:
 ## Tips for Effective Context Files
 
 :::tip Best practices for AGENTS.md
-1. **Keep it concise** — stay well under 20K chars; the agent reads it every turn
+1. **Keep it concise** — stay under your configured `context_file_max_chars`; the agent reads it every turn
 2. **Structure with headers** — use `##` sections for architecture, conventions, important notes
 3. **Include concrete examples** — show preferred code patterns, API shapes, naming conventions
 4. **Mention what NOT to do** — "never modify migration files directly"
diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/developer-guide/prompt-assembly.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/developer-guide/prompt-assembly.md
index 84e7ddbf6bf..28c474c21cd 100644
--- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/developer-guide/prompt-assembly.md
+++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/developer-guide/prompt-assembly.md
@@ -128,7 +128,7 @@ def load_soul_md() -> Optional[str]:
         return None
     content = soul_path.read_text(encoding="utf-8").strip()
     content = _scan_context_content(content, "SOUL.md")  # Security scan
-    content = _truncate_content(content, "SOUL.md")       # Cap at 20k chars
+    content = _truncate_content(content, "SOUL.md")       # Cap defaults to 20k chars, configurable
     return content
 ```