feat(prompt): make context-file truncation limit configurable

PROBLEM: Automatic context files such as SOUL.md and AGENTS.md were capped by a hardcoded CONTEXT_FILE_MAX_CHARS value. Amy's local fork had raised that constant from 20K to 25K so a larger SOUL.md would not be silently truncated, but the hardcoded 25K value changed upstream default behavior and made the patch less generally useful.

SOLUTION: Restore the upstream-compatible 20K default, add a context_file_max_chars config setting for users who intentionally keep larger identity/project-context files, keep chat-visible truncation warnings, and document the new setting. Tests cover the default, config override, explicit max_chars precedence, and the warning text.
This commit is contained in:
Wolfram Ravenwolf 2026-04-11 03:34:08 +02:00 committed by Teknium
parent 17251e865b
commit f6a42b1acf
8 changed files with 126 additions and 10 deletions

View file

@ -958,6 +958,34 @@ CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
CONTEXT_TRUNCATE_TAIL_RATIO = 0.2
def _get_context_file_max_chars() -> int:
"""Return the configured context-file truncation limit.
``CONTEXT_FILE_MAX_CHARS`` remains the upstream-compatible default and
fallback. Users with larger context windows can raise
``context_file_max_chars`` in config.yaml without patching Hermes.
"""
try:
from hermes_cli.config import load_config
val = load_config().get("context_file_max_chars")
if isinstance(val, (int, float)) and val > 0:
return int(val)
except Exception as e:
logger.debug("Could not read context_file_max_chars from config: %s", e)
return CONTEXT_FILE_MAX_CHARS
# Collect truncation warnings so the caller (run_agent) can surface them.
_truncation_warnings: list = []
def drain_truncation_warnings() -> list:
"""Return and clear any truncation warnings accumulated since last drain."""
warnings = _truncation_warnings.copy()
_truncation_warnings.clear()
return warnings
# =========================================================================
# Skills prompt cache
# =========================================================================
@ -1463,10 +1491,19 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
# Context files (SOUL.md, AGENTS.md, .cursorrules)
# =========================================================================
def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str:
def _truncate_content(content: str, filename: str, max_chars: Optional[int] = None) -> str:
"""Head/tail truncation with a marker in the middle."""
if max_chars is None:
max_chars = _get_context_file_max_chars()
if len(content) <= max_chars:
return content
msg = (
f"⚠️ Context file {filename} TRUNCATED: "
f"{len(content)} chars exceeds limit of {max_chars}"
f"increase context_file_max_chars or trim the file!"
)
logger.warning(msg)
_truncation_warnings.append(msg)
head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO)
tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO)
head = content[:head_chars]

View file

@ -40,6 +40,7 @@ from agent.prompt_builder import (
TASK_COMPLETION_GUIDANCE,
TOOL_USE_ENFORCEMENT_GUIDANCE,
TOOL_USE_ENFORCEMENT_MODELS,
drain_truncation_warnings,
)
from agent.runtime_cwd import resolve_context_cwd
@ -400,7 +401,14 @@ def build_system_prompt(agent: Any, system_message: Optional[str] = None) -> str
warm across turns.
"""
parts = build_system_prompt_parts(agent, system_message=system_message)
return "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
# Surface context-file truncation warnings through the normal agent status
# channel so gateway/CLI users see them in chat instead of only in logs.
for warning in drain_truncation_warnings():
agent._emit_status(warning)
return joined
def invalidate_system_prompt(agent: Any) -> None:

View file

@ -1104,6 +1104,11 @@ DEFAULT_CONFIG = {
"min_interval_hours": 24,
},
# Maximum characters loaded from a single automatic context file such as
# SOUL.md, AGENTS.md, CLAUDE.md, .hermes.md, or .cursorrules before Hermes
# applies head/tail truncation. This is separate from read_file tool limits.
"context_file_max_chars": 20_000,
# Maximum characters returned by a single read_file call. Reads that
# exceed this are rejected with guidance to use offset+limit.
# 100K chars ≈ 2535K tokens across typical tokenisers.

View file

@ -20,6 +20,7 @@ from agent.prompt_builder import (
build_context_files_prompt,
CONTEXT_FILE_MAX_CHARS,
DEFAULT_AGENT_IDENTITY,
drain_truncation_warnings,
TOOL_USE_ENFORCEMENT_GUIDANCE,
TOOL_USE_ENFORCEMENT_MODELS,
OPENAI_MODEL_EXECUTION_GUIDANCE,
@ -113,6 +114,18 @@ class TestScanContextContent:
class TestTruncateContent:
@pytest.fixture(autouse=True)
def _reset_truncation_state(self, monkeypatch):
drain_truncation_warnings()
def default_load_config():
return {}
monkeypatch.setattr("hermes_cli.config.load_config", default_load_config)
def test_context_file_max_chars_default_matches_upstream_limit(self):
assert CONTEXT_FILE_MAX_CHARS == 20_000
def test_short_content_unchanged(self):
content = "Short content"
result = _truncate_content(content, "test.md")
@ -138,6 +151,45 @@ class TestTruncateContent:
result = _truncate_content(content, "exact.md")
assert result == content
def test_configured_context_file_max_chars_controls_truncation(self, monkeypatch):
def fake_load_config():
return {"context_file_max_chars": 120}
monkeypatch.setattr("hermes_cli.config.load_config", fake_load_config)
content = "HEAD" + "x" * 160 + "TAIL"
result = _truncate_content(content, "config.md")
assert result != content
assert "truncated config.md" in result
assert "kept 84+24" in result
assert "HEAD" in result
assert "TAIL" in result
def test_explicit_max_chars_overrides_config(self, monkeypatch):
def fake_load_config():
return {"context_file_max_chars": 120}
monkeypatch.setattr("hermes_cli.config.load_config", fake_load_config)
content = "x" * 180
result = _truncate_content(content, "explicit.md", max_chars=200)
assert result == content
def test_truncation_warning_points_to_config_key(self, monkeypatch):
def fake_load_config():
return {"context_file_max_chars": 120}
monkeypatch.setattr("hermes_cli.config.load_config", fake_load_config)
_truncate_content("x" * 180, "warning.md")
warnings = drain_truncation_warnings()
assert len(warnings) == 1
assert "context_file_max_chars" in warnings[0]
assert "CONTEXT_FILE_MAX_CHARS" not in warnings[0]
# =========================================================================
# _parse_skill_file — single-pass skill file reading

View file

@ -128,7 +128,7 @@ def load_soul_md() -> Optional[str]:
return None
content = soul_path.read_text(encoding="utf-8").strip()
content = _scan_context_content(content, "SOUL.md") # Security scan
content = _truncate_content(content, "SOUL.md") # Cap at 20k chars
content = _truncate_content(content, "SOUL.md") # Cap defaults to 20k chars, configurable
return content
```
@ -195,7 +195,7 @@ def build_context_files_prompt(cwd=None, skip_soul=False):
All context files are:
- **Security scanned** — checked for prompt injection patterns (invisible unicode, "ignore previous instructions", credential exfiltration attempts)
- **Truncated** — capped at 20,000 characters using 70/20 head/tail ratio with a truncation marker
- **Truncated** — capped at `context_file_max_chars` characters (default 20,000) using 70/20 head/tail ratio with a truncation marker
- **YAML frontmatter stripped**`.hermes.md` frontmatter is removed (reserved for future config overrides)
## API-call-time-only layers

View file

@ -606,6 +606,20 @@ memory:
With `memory.write_approval: true`, memory writes need your approval before they land: interactive CLI turns prompt inline; messaging sessions and the background self-improvement review stage the write for `/memory pending``/memory approve <id>` / `/memory reject <id>` review. Toggle at runtime with `/memory approval on|off`. See [Controlling memory writes](/user-guide/features/memory#controlling-memory-writes-write_approval).
## Context File Truncation
Controls how much content Hermes loads from each automatic context file before applying head/tail truncation. This applies to files injected into the system prompt such as `SOUL.md`, `.hermes.md`, `AGENTS.md`, `CLAUDE.md`, and `.cursorrules`. It does **not** affect the `read_file` tool.
```yaml
context_file_max_chars: 20000 # default
```
Raise it when you intentionally keep larger identity or project-context files and run models with enough context window to carry them:
```yaml
context_file_max_chars: 25000
```
## File Read Safety
Controls how much content a single `read_file` call can return. Reads that exceed the limit are rejected with an error telling the agent to use `offset` and `limit` for a smaller range. This prevents a single read of a minified JS bundle or large data file from flooding the context window.
@ -1839,7 +1853,7 @@ Hermes uses two different context scopes:
- **Project context files use a priority system** — only ONE type is loaded (first match wins): `.hermes.md``AGENTS.md``CLAUDE.md``.cursorrules`. SOUL.md is always loaded independently.
- **AGENTS.md** is hierarchical: if subdirectories also have AGENTS.md, all are combined.
- Hermes automatically seeds a default `SOUL.md` if one does not already exist.
- All loaded context files are capped at 20,000 characters with smart truncation.
- All loaded context files are capped at `context_file_max_chars` characters (default 20,000) with smart truncation.
See also:
- [Personality & SOUL.md](/user-guide/features/personality)

View file

@ -109,7 +109,7 @@ Context files are loaded by `build_context_files_prompt()` in `agent/prompt_buil
1. **Scan working directory** — checks for `.hermes.md``AGENTS.md``CLAUDE.md``.cursorrules` (first match wins)
2. **Content is read** — each file is read as UTF-8 text
3. **Security scan** — content is checked for prompt injection patterns
4. **Truncation** — files exceeding 20,000 characters are head/tail truncated (70% head, 20% tail, with a marker in the middle)
4. **Truncation** — files exceeding `context_file_max_chars` characters (default 20,000) are head/tail truncated (70% head, 20% tail, with a marker in the middle)
5. **Assembly** — all sections are combined under a `# Project Context` header
6. **Injection** — the assembled content is added to the system prompt
@ -171,12 +171,12 @@ This scanner protects against common injection patterns, but it's not a substitu
| Limit | Value |
|-------|-------|
| Max chars per file | 20,000 (~7,000 tokens) |
| Max chars per file | `context_file_max_chars` (default 20,000, ~7,000 tokens) |
| Head truncation ratio | 70% |
| Tail truncation ratio | 20% |
| Truncation marker | 10% (shows char counts and suggests using file tools) |
When a file exceeds 20,000 characters, the truncation message reads:
When a file exceeds the configured limit, the truncation message reads:
```
[...truncated AGENTS.md: kept 14000+4000 of 25000 chars. Use file tools to read the full file.]
@ -185,7 +185,7 @@ When a file exceeds 20,000 characters, the truncation message reads:
## Tips for Effective Context Files
:::tip Best practices for AGENTS.md
1. **Keep it concise** — stay well under 20K chars; the agent reads it every turn
1. **Keep it concise** — stay under your configured `context_file_max_chars`; the agent reads it every turn
2. **Structure with headers** — use `##` sections for architecture, conventions, important notes
3. **Include concrete examples** — show preferred code patterns, API shapes, naming conventions
4. **Mention what NOT to do** — "never modify migration files directly"

View file

@ -128,7 +128,7 @@ def load_soul_md() -> Optional[str]:
return None
content = soul_path.read_text(encoding="utf-8").strip()
content = _scan_context_content(content, "SOUL.md") # Security scan
content = _truncate_content(content, "SOUL.md") # Cap at 20k chars
content = _truncate_content(content, "SOUL.md") # Cap defaults to 20k chars, configurable
return content
```