feat(prompt): make context-file truncation limit configurable

PROBLEM: Automatic context files such as SOUL.md and AGENTS.md were capped by a hardcoded CONTEXT_FILE_MAX_CHARS value. Amy's local fork had raised that constant from 20K to 25K so a larger SOUL.md would not be silently truncated, but the hardcoded 25K value changed upstream default behavior and made the patch less generally useful. SOLUTION: Restore the upstream-compatible 20K default, add a context_file_max_chars config setting for users who intentionally keep larger identity/project-context files, keep chat-visible truncation warnings, and document the new setting. Tests cover the default, config override, explicit max_chars precedence, and the warning text.
2026-06-21 10:22:18 +00:00 · 2026-04-11 03:34:08 +02:00 · 2026-04-11 03:34:08 +02:00 · f6a42b1acf
commit f6a42b1acf
parent 17251e865b
8 changed files with 126 additions and 10 deletions
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@ -958,6 +958,34 @@ CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
 CONTEXT_TRUNCATE_TAIL_RATIO = 0.2


+def _get_context_file_max_chars() -> int:
+    """Return the configured context-file truncation limit.
+
+    ``CONTEXT_FILE_MAX_CHARS`` remains the upstream-compatible default and
+    fallback. Users with larger context windows can raise
+    ``context_file_max_chars`` in config.yaml without patching Hermes.
+    """
+    try:
+        from hermes_cli.config import load_config
+
+        val = load_config().get("context_file_max_chars")
+        if isinstance(val, (int, float)) and val > 0:
+            return int(val)
+    except Exception as e:
+        logger.debug("Could not read context_file_max_chars from config: %s", e)
+    return CONTEXT_FILE_MAX_CHARS
+
+# Collect truncation warnings so the caller (run_agent) can surface them.
+_truncation_warnings: list = []
+
+
+def drain_truncation_warnings() -> list:
+    """Return and clear any truncation warnings accumulated since last drain."""
+    warnings = _truncation_warnings.copy()
+    _truncation_warnings.clear()
+    return warnings
+
+
 # =========================================================================
 # Skills prompt cache
 # =========================================================================
@ -1463,10 +1491,19 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
 # Context files (SOUL.md, AGENTS.md, .cursorrules)
 # =========================================================================

-def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str:
+def _truncate_content(content: str, filename: str, max_chars: Optional[int] = None) -> str:
    """Head/tail truncation with a marker in the middle."""
+    if max_chars is None:
+        max_chars = _get_context_file_max_chars()
    if len(content) <= max_chars:
        return content
+    msg = (
+        f"⚠️  Context file {filename} TRUNCATED: "
+        f"{len(content)} chars exceeds limit of {max_chars} — "
+        f"increase context_file_max_chars or trim the file!"
+    )
+    logger.warning(msg)
+    _truncation_warnings.append(msg)
    head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO)
    tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO)
    head = content[:head_chars]
--- a/agent/system_prompt.py
+++ b/agent/system_prompt.py
@ -40,6 +40,7 @@ from agent.prompt_builder import (
    TASK_COMPLETION_GUIDANCE,
    TOOL_USE_ENFORCEMENT_GUIDANCE,
    TOOL_USE_ENFORCEMENT_MODELS,
+    drain_truncation_warnings,
 )
 from agent.runtime_cwd import resolve_context_cwd

@ -400,7 +401,14 @@ def build_system_prompt(agent: Any, system_message: Optional[str] = None) -> str
    warm across turns.
    """
    parts = build_system_prompt_parts(agent, system_message=system_message)
-    return "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
+    joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
+
+    # Surface context-file truncation warnings through the normal agent status
+    # channel so gateway/CLI users see them in chat instead of only in logs.
+    for warning in drain_truncation_warnings():
+        agent._emit_status(warning)
+
+    return joined


 def invalidate_system_prompt(agent: Any) -> None:
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -1104,6 +1104,11 @@ DEFAULT_CONFIG = {
        "min_interval_hours": 24,
    },

+    # Maximum characters loaded from a single automatic context file such as
+    # SOUL.md, AGENTS.md, CLAUDE.md, .hermes.md, or .cursorrules before Hermes
+    # applies head/tail truncation. This is separate from read_file tool limits.
+    "context_file_max_chars": 20_000,
+
    # Maximum characters returned by a single read_file call.  Reads that
    # exceed this are rejected with guidance to use offset+limit.
    # 100K chars ≈ 25–35K tokens across typical tokenisers.
--- a/tests/agent/test_prompt_builder.py
+++ b/tests/agent/test_prompt_builder.py
@ -20,6 +20,7 @@ from agent.prompt_builder import (
    build_context_files_prompt,
    CONTEXT_FILE_MAX_CHARS,
    DEFAULT_AGENT_IDENTITY,
+    drain_truncation_warnings,
    TOOL_USE_ENFORCEMENT_GUIDANCE,
    TOOL_USE_ENFORCEMENT_MODELS,
    OPENAI_MODEL_EXECUTION_GUIDANCE,
@ -113,6 +114,18 @@ class TestScanContextContent:


 class TestTruncateContent:
+    @pytest.fixture(autouse=True)
+    def _reset_truncation_state(self, monkeypatch):
+        drain_truncation_warnings()
+
+        def default_load_config():
+            return {}
+
+        monkeypatch.setattr("hermes_cli.config.load_config", default_load_config)
+
+    def test_context_file_max_chars_default_matches_upstream_limit(self):
+        assert CONTEXT_FILE_MAX_CHARS == 20_000
+
    def test_short_content_unchanged(self):
        content = "Short content"
        result = _truncate_content(content, "test.md")
@ -138,6 +151,45 @@ class TestTruncateContent:
        result = _truncate_content(content, "exact.md")
        assert result == content

+    def test_configured_context_file_max_chars_controls_truncation(self, monkeypatch):
+        def fake_load_config():
+            return {"context_file_max_chars": 120}
+
+        monkeypatch.setattr("hermes_cli.config.load_config", fake_load_config)
+        content = "HEAD" + "x" * 160 + "TAIL"
+
+        result = _truncate_content(content, "config.md")
+
+        assert result != content
+        assert "truncated config.md" in result
+        assert "kept 84+24" in result
+        assert "HEAD" in result
+        assert "TAIL" in result
+
+    def test_explicit_max_chars_overrides_config(self, monkeypatch):
+        def fake_load_config():
+            return {"context_file_max_chars": 120}
+
+        monkeypatch.setattr("hermes_cli.config.load_config", fake_load_config)
+        content = "x" * 180
+
+        result = _truncate_content(content, "explicit.md", max_chars=200)
+
+        assert result == content
+
+    def test_truncation_warning_points_to_config_key(self, monkeypatch):
+        def fake_load_config():
+            return {"context_file_max_chars": 120}
+
+        monkeypatch.setattr("hermes_cli.config.load_config", fake_load_config)
+
+        _truncate_content("x" * 180, "warning.md")
+
+        warnings = drain_truncation_warnings()
+        assert len(warnings) == 1
+        assert "context_file_max_chars" in warnings[0]
+        assert "CONTEXT_FILE_MAX_CHARS" not in warnings[0]
+

 # =========================================================================
 # _parse_skill_file — single-pass skill file reading
--- a/website/docs/developer-guide/prompt-assembly.md
+++ b/website/docs/developer-guide/prompt-assembly.md
@ -128,7 +128,7 @@ def load_soul_md() -> Optional[str]:
        return None
    content = soul_path.read_text(encoding="utf-8").strip()
    content = _scan_context_content(content, "SOUL.md")  # Security scan
-    content = _truncate_content(content, "SOUL.md")       # Cap at 20k chars
+    content = _truncate_content(content, "SOUL.md")       # Cap defaults to 20k chars, configurable
    return content
 ```

@ -195,7 +195,7 @@ def build_context_files_prompt(cwd=None, skip_soul=False):

 All context files are:
 - **Security scanned** — checked for prompt injection patterns (invisible unicode, "ignore previous instructions", credential exfiltration attempts)
- **Truncated** — capped at 20,000 characters using 70/20 head/tail ratio with a truncation marker
+- **Truncated** — capped at `context_file_max_chars` characters (default 20,000) using 70/20 head/tail ratio with a truncation marker
 - **YAML frontmatter stripped** — `.hermes.md` frontmatter is removed (reserved for future config overrides)

 ## API-call-time-only layers
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@ -606,6 +606,20 @@ memory:

 With `memory.write_approval: true`, memory writes need your approval before they land: interactive CLI turns prompt inline; messaging sessions and the background self-improvement review stage the write for `/memory pending` → `/memory approve <id>` / `/memory reject <id>` review. Toggle at runtime with `/memory approval on|off`. See [Controlling memory writes](/user-guide/features/memory#controlling-memory-writes-write_approval).

+## Context File Truncation
+
+Controls how much content Hermes loads from each automatic context file before applying head/tail truncation. This applies to files injected into the system prompt such as `SOUL.md`, `.hermes.md`, `AGENTS.md`, `CLAUDE.md`, and `.cursorrules`. It does **not** affect the `read_file` tool.
+
+```yaml
+context_file_max_chars: 20000  # default
+```
+
+Raise it when you intentionally keep larger identity or project-context files and run models with enough context window to carry them:
+
+```yaml
+context_file_max_chars: 25000
+```
+
 ## File Read Safety

 Controls how much content a single `read_file` call can return. Reads that exceed the limit are rejected with an error telling the agent to use `offset` and `limit` for a smaller range. This prevents a single read of a minified JS bundle or large data file from flooding the context window.
@ -1839,7 +1853,7 @@ Hermes uses two different context scopes:
 - **Project context files use a priority system** — only ONE type is loaded (first match wins): `.hermes.md` → `AGENTS.md` → `CLAUDE.md` → `.cursorrules`. SOUL.md is always loaded independently.
 - **AGENTS.md** is hierarchical: if subdirectories also have AGENTS.md, all are combined.
 - Hermes automatically seeds a default `SOUL.md` if one does not already exist.
- All loaded context files are capped at 20,000 characters with smart truncation.
+- All loaded context files are capped at `context_file_max_chars` characters (default 20,000) with smart truncation.

 See also:
 - [Personality & SOUL.md](/user-guide/features/personality)
--- a/website/docs/user-guide/features/context-files.md
+++ b/website/docs/user-guide/features/context-files.md
@ -109,7 +109,7 @@ Context files are loaded by `build_context_files_prompt()` in `agent/prompt_buil
 1. **Scan working directory** — checks for `.hermes.md` → `AGENTS.md` → `CLAUDE.md` → `.cursorrules` (first match wins)
 2. **Content is read** — each file is read as UTF-8 text
 3. **Security scan** — content is checked for prompt injection patterns
-4. **Truncation** — files exceeding 20,000 characters are head/tail truncated (70% head, 20% tail, with a marker in the middle)
+4. **Truncation** — files exceeding `context_file_max_chars` characters (default 20,000) are head/tail truncated (70% head, 20% tail, with a marker in the middle)
 5. **Assembly** — all sections are combined under a `# Project Context` header
 6. **Injection** — the assembled content is added to the system prompt

@ -171,12 +171,12 @@ This scanner protects against common injection patterns, but it's not a substitu

 | Limit | Value |
 |-------|-------|
-| Max chars per file | 20,000 (~7,000 tokens) |
+| Max chars per file | `context_file_max_chars` (default 20,000, ~7,000 tokens) |
 | Head truncation ratio | 70% |
 | Tail truncation ratio | 20% |
 | Truncation marker | 10% (shows char counts and suggests using file tools) |

-When a file exceeds 20,000 characters, the truncation message reads:
+When a file exceeds the configured limit, the truncation message reads:

 ```
 [...truncated AGENTS.md: kept 14000+4000 of 25000 chars. Use file tools to read the full file.]
@ -185,7 +185,7 @@ When a file exceeds 20,000 characters, the truncation message reads:
 ## Tips for Effective Context Files

 :::tip Best practices for AGENTS.md
-1. **Keep it concise** — stay well under 20K chars; the agent reads it every turn
+1. **Keep it concise** — stay under your configured `context_file_max_chars`; the agent reads it every turn
 2. **Structure with headers** — use `##` sections for architecture, conventions, important notes
 3. **Include concrete examples** — show preferred code patterns, API shapes, naming conventions
 4. **Mention what NOT to do** — "never modify migration files directly"
--- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/developer-guide/prompt-assembly.md
+++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/developer-guide/prompt-assembly.md
@ -128,7 +128,7 @@ def load_soul_md() -> Optional[str]:
        return None
    content = soul_path.read_text(encoding="utf-8").strip()
    content = _scan_context_content(content, "SOUL.md")  # Security scan
-    content = _truncate_content(content, "SOUL.md")       # Cap at 20k chars
+    content = _truncate_content(content, "SOUL.md")       # Cap defaults to 20k chars, configurable
    return content
 ```