From f6a42b1acf23a476f84f4b6adf78c62580cc4ac1 Mon Sep 17 00:00:00 2001 From: Wolfram Ravenwolf Date: Sat, 11 Apr 2026 03:34:08 +0200 Subject: [PATCH] feat(prompt): make context-file truncation limit configurable PROBLEM: Automatic context files such as SOUL.md and AGENTS.md were capped by a hardcoded CONTEXT_FILE_MAX_CHARS value. Amy's local fork had raised that constant from 20K to 25K so a larger SOUL.md would not be silently truncated, but the hardcoded 25K value changed upstream default behavior and made the patch less generally useful. SOLUTION: Restore the upstream-compatible 20K default, add a context_file_max_chars config setting for users who intentionally keep larger identity/project-context files, keep chat-visible truncation warnings, and document the new setting. Tests cover the default, config override, explicit max_chars precedence, and the warning text. --- agent/prompt_builder.py | 39 +++++++++++++- agent/system_prompt.py | 10 +++- hermes_cli/config.py | 5 ++ tests/agent/test_prompt_builder.py | 52 +++++++++++++++++++ .../docs/developer-guide/prompt-assembly.md | 4 +- website/docs/user-guide/configuration.md | 16 +++++- .../docs/user-guide/features/context-files.md | 8 +-- .../developer-guide/prompt-assembly.md | 2 +- 8 files changed, 126 insertions(+), 10 deletions(-) diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index b11cade39bd..e095857545b 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -958,6 +958,34 @@ CONTEXT_TRUNCATE_HEAD_RATIO = 0.7 CONTEXT_TRUNCATE_TAIL_RATIO = 0.2 +def _get_context_file_max_chars() -> int: + """Return the configured context-file truncation limit. + + ``CONTEXT_FILE_MAX_CHARS`` remains the upstream-compatible default and + fallback. Users with larger context windows can raise + ``context_file_max_chars`` in config.yaml without patching Hermes. + """ + try: + from hermes_cli.config import load_config + + val = load_config().get("context_file_max_chars") + if isinstance(val, (int, float)) and val > 0: + return int(val) + except Exception as e: + logger.debug("Could not read context_file_max_chars from config: %s", e) + return CONTEXT_FILE_MAX_CHARS + +# Collect truncation warnings so the caller (run_agent) can surface them. +_truncation_warnings: list = [] + + +def drain_truncation_warnings() -> list: + """Return and clear any truncation warnings accumulated since last drain.""" + warnings = _truncation_warnings.copy() + _truncation_warnings.clear() + return warnings + + # ========================================================================= # Skills prompt cache # ========================================================================= @@ -1463,10 +1491,19 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) - # Context files (SOUL.md, AGENTS.md, .cursorrules) # ========================================================================= -def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str: +def _truncate_content(content: str, filename: str, max_chars: Optional[int] = None) -> str: """Head/tail truncation with a marker in the middle.""" + if max_chars is None: + max_chars = _get_context_file_max_chars() if len(content) <= max_chars: return content + msg = ( + f"⚠️ Context file {filename} TRUNCATED: " + f"{len(content)} chars exceeds limit of {max_chars} — " + f"increase context_file_max_chars or trim the file!" + ) + logger.warning(msg) + _truncation_warnings.append(msg) head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO) tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO) head = content[:head_chars] diff --git a/agent/system_prompt.py b/agent/system_prompt.py index 76f57dfcdbc..9c0e1424245 100644 --- a/agent/system_prompt.py +++ b/agent/system_prompt.py @@ -40,6 +40,7 @@ from agent.prompt_builder import ( TASK_COMPLETION_GUIDANCE, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, + drain_truncation_warnings, ) from agent.runtime_cwd import resolve_context_cwd @@ -400,7 +401,14 @@ def build_system_prompt(agent: Any, system_message: Optional[str] = None) -> str warm across turns. """ parts = build_system_prompt_parts(agent, system_message=system_message) - return "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p) + joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p) + + # Surface context-file truncation warnings through the normal agent status + # channel so gateway/CLI users see them in chat instead of only in logs. + for warning in drain_truncation_warnings(): + agent._emit_status(warning) + + return joined def invalidate_system_prompt(agent: Any) -> None: diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 4f801e2e9b4..2c17717c86b 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1104,6 +1104,11 @@ DEFAULT_CONFIG = { "min_interval_hours": 24, }, + # Maximum characters loaded from a single automatic context file such as + # SOUL.md, AGENTS.md, CLAUDE.md, .hermes.md, or .cursorrules before Hermes + # applies head/tail truncation. This is separate from read_file tool limits. + "context_file_max_chars": 20_000, + # Maximum characters returned by a single read_file call. Reads that # exceed this are rejected with guidance to use offset+limit. # 100K chars ≈ 25–35K tokens across typical tokenisers. diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py index e6c302fdb92..0fc727f2af5 100644 --- a/tests/agent/test_prompt_builder.py +++ b/tests/agent/test_prompt_builder.py @@ -20,6 +20,7 @@ from agent.prompt_builder import ( build_context_files_prompt, CONTEXT_FILE_MAX_CHARS, DEFAULT_AGENT_IDENTITY, + drain_truncation_warnings, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, OPENAI_MODEL_EXECUTION_GUIDANCE, @@ -113,6 +114,18 @@ class TestScanContextContent: class TestTruncateContent: + @pytest.fixture(autouse=True) + def _reset_truncation_state(self, monkeypatch): + drain_truncation_warnings() + + def default_load_config(): + return {} + + monkeypatch.setattr("hermes_cli.config.load_config", default_load_config) + + def test_context_file_max_chars_default_matches_upstream_limit(self): + assert CONTEXT_FILE_MAX_CHARS == 20_000 + def test_short_content_unchanged(self): content = "Short content" result = _truncate_content(content, "test.md") @@ -138,6 +151,45 @@ class TestTruncateContent: result = _truncate_content(content, "exact.md") assert result == content + def test_configured_context_file_max_chars_controls_truncation(self, monkeypatch): + def fake_load_config(): + return {"context_file_max_chars": 120} + + monkeypatch.setattr("hermes_cli.config.load_config", fake_load_config) + content = "HEAD" + "x" * 160 + "TAIL" + + result = _truncate_content(content, "config.md") + + assert result != content + assert "truncated config.md" in result + assert "kept 84+24" in result + assert "HEAD" in result + assert "TAIL" in result + + def test_explicit_max_chars_overrides_config(self, monkeypatch): + def fake_load_config(): + return {"context_file_max_chars": 120} + + monkeypatch.setattr("hermes_cli.config.load_config", fake_load_config) + content = "x" * 180 + + result = _truncate_content(content, "explicit.md", max_chars=200) + + assert result == content + + def test_truncation_warning_points_to_config_key(self, monkeypatch): + def fake_load_config(): + return {"context_file_max_chars": 120} + + monkeypatch.setattr("hermes_cli.config.load_config", fake_load_config) + + _truncate_content("x" * 180, "warning.md") + + warnings = drain_truncation_warnings() + assert len(warnings) == 1 + assert "context_file_max_chars" in warnings[0] + assert "CONTEXT_FILE_MAX_CHARS" not in warnings[0] + # ========================================================================= # _parse_skill_file — single-pass skill file reading diff --git a/website/docs/developer-guide/prompt-assembly.md b/website/docs/developer-guide/prompt-assembly.md index d4b31027e2f..d255c4a2e93 100644 --- a/website/docs/developer-guide/prompt-assembly.md +++ b/website/docs/developer-guide/prompt-assembly.md @@ -128,7 +128,7 @@ def load_soul_md() -> Optional[str]: return None content = soul_path.read_text(encoding="utf-8").strip() content = _scan_context_content(content, "SOUL.md") # Security scan - content = _truncate_content(content, "SOUL.md") # Cap at 20k chars + content = _truncate_content(content, "SOUL.md") # Cap defaults to 20k chars, configurable return content ``` @@ -195,7 +195,7 @@ def build_context_files_prompt(cwd=None, skip_soul=False): All context files are: - **Security scanned** — checked for prompt injection patterns (invisible unicode, "ignore previous instructions", credential exfiltration attempts) -- **Truncated** — capped at 20,000 characters using 70/20 head/tail ratio with a truncation marker +- **Truncated** — capped at `context_file_max_chars` characters (default 20,000) using 70/20 head/tail ratio with a truncation marker - **YAML frontmatter stripped** — `.hermes.md` frontmatter is removed (reserved for future config overrides) ## API-call-time-only layers diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index e22d143ce30..307ec5a2e45 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -606,6 +606,20 @@ memory: With `memory.write_approval: true`, memory writes need your approval before they land: interactive CLI turns prompt inline; messaging sessions and the background self-improvement review stage the write for `/memory pending` → `/memory approve ` / `/memory reject ` review. Toggle at runtime with `/memory approval on|off`. See [Controlling memory writes](/user-guide/features/memory#controlling-memory-writes-write_approval). +## Context File Truncation + +Controls how much content Hermes loads from each automatic context file before applying head/tail truncation. This applies to files injected into the system prompt such as `SOUL.md`, `.hermes.md`, `AGENTS.md`, `CLAUDE.md`, and `.cursorrules`. It does **not** affect the `read_file` tool. + +```yaml +context_file_max_chars: 20000 # default +``` + +Raise it when you intentionally keep larger identity or project-context files and run models with enough context window to carry them: + +```yaml +context_file_max_chars: 25000 +``` + ## File Read Safety Controls how much content a single `read_file` call can return. Reads that exceed the limit are rejected with an error telling the agent to use `offset` and `limit` for a smaller range. This prevents a single read of a minified JS bundle or large data file from flooding the context window. @@ -1839,7 +1853,7 @@ Hermes uses two different context scopes: - **Project context files use a priority system** — only ONE type is loaded (first match wins): `.hermes.md` → `AGENTS.md` → `CLAUDE.md` → `.cursorrules`. SOUL.md is always loaded independently. - **AGENTS.md** is hierarchical: if subdirectories also have AGENTS.md, all are combined. - Hermes automatically seeds a default `SOUL.md` if one does not already exist. -- All loaded context files are capped at 20,000 characters with smart truncation. +- All loaded context files are capped at `context_file_max_chars` characters (default 20,000) with smart truncation. See also: - [Personality & SOUL.md](/user-guide/features/personality) diff --git a/website/docs/user-guide/features/context-files.md b/website/docs/user-guide/features/context-files.md index 86766e69f07..195201439f2 100644 --- a/website/docs/user-guide/features/context-files.md +++ b/website/docs/user-guide/features/context-files.md @@ -109,7 +109,7 @@ Context files are loaded by `build_context_files_prompt()` in `agent/prompt_buil 1. **Scan working directory** — checks for `.hermes.md` → `AGENTS.md` → `CLAUDE.md` → `.cursorrules` (first match wins) 2. **Content is read** — each file is read as UTF-8 text 3. **Security scan** — content is checked for prompt injection patterns -4. **Truncation** — files exceeding 20,000 characters are head/tail truncated (70% head, 20% tail, with a marker in the middle) +4. **Truncation** — files exceeding `context_file_max_chars` characters (default 20,000) are head/tail truncated (70% head, 20% tail, with a marker in the middle) 5. **Assembly** — all sections are combined under a `# Project Context` header 6. **Injection** — the assembled content is added to the system prompt @@ -171,12 +171,12 @@ This scanner protects against common injection patterns, but it's not a substitu | Limit | Value | |-------|-------| -| Max chars per file | 20,000 (~7,000 tokens) | +| Max chars per file | `context_file_max_chars` (default 20,000, ~7,000 tokens) | | Head truncation ratio | 70% | | Tail truncation ratio | 20% | | Truncation marker | 10% (shows char counts and suggests using file tools) | -When a file exceeds 20,000 characters, the truncation message reads: +When a file exceeds the configured limit, the truncation message reads: ``` [...truncated AGENTS.md: kept 14000+4000 of 25000 chars. Use file tools to read the full file.] @@ -185,7 +185,7 @@ When a file exceeds 20,000 characters, the truncation message reads: ## Tips for Effective Context Files :::tip Best practices for AGENTS.md -1. **Keep it concise** — stay well under 20K chars; the agent reads it every turn +1. **Keep it concise** — stay under your configured `context_file_max_chars`; the agent reads it every turn 2. **Structure with headers** — use `##` sections for architecture, conventions, important notes 3. **Include concrete examples** — show preferred code patterns, API shapes, naming conventions 4. **Mention what NOT to do** — "never modify migration files directly" diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/developer-guide/prompt-assembly.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/developer-guide/prompt-assembly.md index 84e7ddbf6bf..28c474c21cd 100644 --- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/developer-guide/prompt-assembly.md +++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/developer-guide/prompt-assembly.md @@ -128,7 +128,7 @@ def load_soul_md() -> Optional[str]: return None content = soul_path.read_text(encoding="utf-8").strip() content = _scan_context_content(content, "SOUL.md") # Security scan - content = _truncate_content(content, "SOUL.md") # Cap at 20k chars + content = _truncate_content(content, "SOUL.md") # Cap defaults to 20k chars, configurable return content ```