mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-21 10:22:18 +00:00
feat(prompt): make context-file truncation limit configurable
PROBLEM: Automatic context files such as SOUL.md and AGENTS.md were capped by a hardcoded CONTEXT_FILE_MAX_CHARS value. Amy's local fork had raised that constant from 20K to 25K so a larger SOUL.md would not be silently truncated, but the hardcoded 25K value changed upstream default behavior and made the patch less generally useful. SOLUTION: Restore the upstream-compatible 20K default, add a context_file_max_chars config setting for users who intentionally keep larger identity/project-context files, keep chat-visible truncation warnings, and document the new setting. Tests cover the default, config override, explicit max_chars precedence, and the warning text.
This commit is contained in:
parent
17251e865b
commit
f6a42b1acf
8 changed files with 126 additions and 10 deletions
|
|
@ -958,6 +958,34 @@ CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
|
|||
CONTEXT_TRUNCATE_TAIL_RATIO = 0.2
|
||||
|
||||
|
||||
def _get_context_file_max_chars() -> int:
|
||||
"""Return the configured context-file truncation limit.
|
||||
|
||||
``CONTEXT_FILE_MAX_CHARS`` remains the upstream-compatible default and
|
||||
fallback. Users with larger context windows can raise
|
||||
``context_file_max_chars`` in config.yaml without patching Hermes.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
val = load_config().get("context_file_max_chars")
|
||||
if isinstance(val, (int, float)) and val > 0:
|
||||
return int(val)
|
||||
except Exception as e:
|
||||
logger.debug("Could not read context_file_max_chars from config: %s", e)
|
||||
return CONTEXT_FILE_MAX_CHARS
|
||||
|
||||
# Collect truncation warnings so the caller (run_agent) can surface them.
|
||||
_truncation_warnings: list = []
|
||||
|
||||
|
||||
def drain_truncation_warnings() -> list:
|
||||
"""Return and clear any truncation warnings accumulated since last drain."""
|
||||
warnings = _truncation_warnings.copy()
|
||||
_truncation_warnings.clear()
|
||||
return warnings
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Skills prompt cache
|
||||
# =========================================================================
|
||||
|
|
@ -1463,10 +1491,19 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
|||
# Context files (SOUL.md, AGENTS.md, .cursorrules)
|
||||
# =========================================================================
|
||||
|
||||
def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str:
|
||||
def _truncate_content(content: str, filename: str, max_chars: Optional[int] = None) -> str:
|
||||
"""Head/tail truncation with a marker in the middle."""
|
||||
if max_chars is None:
|
||||
max_chars = _get_context_file_max_chars()
|
||||
if len(content) <= max_chars:
|
||||
return content
|
||||
msg = (
|
||||
f"⚠️ Context file {filename} TRUNCATED: "
|
||||
f"{len(content)} chars exceeds limit of {max_chars} — "
|
||||
f"increase context_file_max_chars or trim the file!"
|
||||
)
|
||||
logger.warning(msg)
|
||||
_truncation_warnings.append(msg)
|
||||
head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO)
|
||||
tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO)
|
||||
head = content[:head_chars]
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ from agent.prompt_builder import (
|
|||
TASK_COMPLETION_GUIDANCE,
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE,
|
||||
TOOL_USE_ENFORCEMENT_MODELS,
|
||||
drain_truncation_warnings,
|
||||
)
|
||||
from agent.runtime_cwd import resolve_context_cwd
|
||||
|
||||
|
|
@ -400,7 +401,14 @@ def build_system_prompt(agent: Any, system_message: Optional[str] = None) -> str
|
|||
warm across turns.
|
||||
"""
|
||||
parts = build_system_prompt_parts(agent, system_message=system_message)
|
||||
return "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
|
||||
joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
|
||||
|
||||
# Surface context-file truncation warnings through the normal agent status
|
||||
# channel so gateway/CLI users see them in chat instead of only in logs.
|
||||
for warning in drain_truncation_warnings():
|
||||
agent._emit_status(warning)
|
||||
|
||||
return joined
|
||||
|
||||
|
||||
def invalidate_system_prompt(agent: Any) -> None:
|
||||
|
|
|
|||
|
|
@ -1104,6 +1104,11 @@ DEFAULT_CONFIG = {
|
|||
"min_interval_hours": 24,
|
||||
},
|
||||
|
||||
# Maximum characters loaded from a single automatic context file such as
|
||||
# SOUL.md, AGENTS.md, CLAUDE.md, .hermes.md, or .cursorrules before Hermes
|
||||
# applies head/tail truncation. This is separate from read_file tool limits.
|
||||
"context_file_max_chars": 20_000,
|
||||
|
||||
# Maximum characters returned by a single read_file call. Reads that
|
||||
# exceed this are rejected with guidance to use offset+limit.
|
||||
# 100K chars ≈ 25–35K tokens across typical tokenisers.
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ from agent.prompt_builder import (
|
|||
build_context_files_prompt,
|
||||
CONTEXT_FILE_MAX_CHARS,
|
||||
DEFAULT_AGENT_IDENTITY,
|
||||
drain_truncation_warnings,
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE,
|
||||
TOOL_USE_ENFORCEMENT_MODELS,
|
||||
OPENAI_MODEL_EXECUTION_GUIDANCE,
|
||||
|
|
@ -113,6 +114,18 @@ class TestScanContextContent:
|
|||
|
||||
|
||||
class TestTruncateContent:
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_truncation_state(self, monkeypatch):
|
||||
drain_truncation_warnings()
|
||||
|
||||
def default_load_config():
|
||||
return {}
|
||||
|
||||
monkeypatch.setattr("hermes_cli.config.load_config", default_load_config)
|
||||
|
||||
def test_context_file_max_chars_default_matches_upstream_limit(self):
|
||||
assert CONTEXT_FILE_MAX_CHARS == 20_000
|
||||
|
||||
def test_short_content_unchanged(self):
|
||||
content = "Short content"
|
||||
result = _truncate_content(content, "test.md")
|
||||
|
|
@ -138,6 +151,45 @@ class TestTruncateContent:
|
|||
result = _truncate_content(content, "exact.md")
|
||||
assert result == content
|
||||
|
||||
def test_configured_context_file_max_chars_controls_truncation(self, monkeypatch):
|
||||
def fake_load_config():
|
||||
return {"context_file_max_chars": 120}
|
||||
|
||||
monkeypatch.setattr("hermes_cli.config.load_config", fake_load_config)
|
||||
content = "HEAD" + "x" * 160 + "TAIL"
|
||||
|
||||
result = _truncate_content(content, "config.md")
|
||||
|
||||
assert result != content
|
||||
assert "truncated config.md" in result
|
||||
assert "kept 84+24" in result
|
||||
assert "HEAD" in result
|
||||
assert "TAIL" in result
|
||||
|
||||
def test_explicit_max_chars_overrides_config(self, monkeypatch):
|
||||
def fake_load_config():
|
||||
return {"context_file_max_chars": 120}
|
||||
|
||||
monkeypatch.setattr("hermes_cli.config.load_config", fake_load_config)
|
||||
content = "x" * 180
|
||||
|
||||
result = _truncate_content(content, "explicit.md", max_chars=200)
|
||||
|
||||
assert result == content
|
||||
|
||||
def test_truncation_warning_points_to_config_key(self, monkeypatch):
|
||||
def fake_load_config():
|
||||
return {"context_file_max_chars": 120}
|
||||
|
||||
monkeypatch.setattr("hermes_cli.config.load_config", fake_load_config)
|
||||
|
||||
_truncate_content("x" * 180, "warning.md")
|
||||
|
||||
warnings = drain_truncation_warnings()
|
||||
assert len(warnings) == 1
|
||||
assert "context_file_max_chars" in warnings[0]
|
||||
assert "CONTEXT_FILE_MAX_CHARS" not in warnings[0]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# _parse_skill_file — single-pass skill file reading
|
||||
|
|
|
|||
|
|
@ -128,7 +128,7 @@ def load_soul_md() -> Optional[str]:
|
|||
return None
|
||||
content = soul_path.read_text(encoding="utf-8").strip()
|
||||
content = _scan_context_content(content, "SOUL.md") # Security scan
|
||||
content = _truncate_content(content, "SOUL.md") # Cap at 20k chars
|
||||
content = _truncate_content(content, "SOUL.md") # Cap defaults to 20k chars, configurable
|
||||
return content
|
||||
```
|
||||
|
||||
|
|
@ -195,7 +195,7 @@ def build_context_files_prompt(cwd=None, skip_soul=False):
|
|||
|
||||
All context files are:
|
||||
- **Security scanned** — checked for prompt injection patterns (invisible unicode, "ignore previous instructions", credential exfiltration attempts)
|
||||
- **Truncated** — capped at 20,000 characters using 70/20 head/tail ratio with a truncation marker
|
||||
- **Truncated** — capped at `context_file_max_chars` characters (default 20,000) using 70/20 head/tail ratio with a truncation marker
|
||||
- **YAML frontmatter stripped** — `.hermes.md` frontmatter is removed (reserved for future config overrides)
|
||||
|
||||
## API-call-time-only layers
|
||||
|
|
|
|||
|
|
@ -606,6 +606,20 @@ memory:
|
|||
|
||||
With `memory.write_approval: true`, memory writes need your approval before they land: interactive CLI turns prompt inline; messaging sessions and the background self-improvement review stage the write for `/memory pending` → `/memory approve <id>` / `/memory reject <id>` review. Toggle at runtime with `/memory approval on|off`. See [Controlling memory writes](/user-guide/features/memory#controlling-memory-writes-write_approval).
|
||||
|
||||
## Context File Truncation
|
||||
|
||||
Controls how much content Hermes loads from each automatic context file before applying head/tail truncation. This applies to files injected into the system prompt such as `SOUL.md`, `.hermes.md`, `AGENTS.md`, `CLAUDE.md`, and `.cursorrules`. It does **not** affect the `read_file` tool.
|
||||
|
||||
```yaml
|
||||
context_file_max_chars: 20000 # default
|
||||
```
|
||||
|
||||
Raise it when you intentionally keep larger identity or project-context files and run models with enough context window to carry them:
|
||||
|
||||
```yaml
|
||||
context_file_max_chars: 25000
|
||||
```
|
||||
|
||||
## File Read Safety
|
||||
|
||||
Controls how much content a single `read_file` call can return. Reads that exceed the limit are rejected with an error telling the agent to use `offset` and `limit` for a smaller range. This prevents a single read of a minified JS bundle or large data file from flooding the context window.
|
||||
|
|
@ -1839,7 +1853,7 @@ Hermes uses two different context scopes:
|
|||
- **Project context files use a priority system** — only ONE type is loaded (first match wins): `.hermes.md` → `AGENTS.md` → `CLAUDE.md` → `.cursorrules`. SOUL.md is always loaded independently.
|
||||
- **AGENTS.md** is hierarchical: if subdirectories also have AGENTS.md, all are combined.
|
||||
- Hermes automatically seeds a default `SOUL.md` if one does not already exist.
|
||||
- All loaded context files are capped at 20,000 characters with smart truncation.
|
||||
- All loaded context files are capped at `context_file_max_chars` characters (default 20,000) with smart truncation.
|
||||
|
||||
See also:
|
||||
- [Personality & SOUL.md](/user-guide/features/personality)
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ Context files are loaded by `build_context_files_prompt()` in `agent/prompt_buil
|
|||
1. **Scan working directory** — checks for `.hermes.md` → `AGENTS.md` → `CLAUDE.md` → `.cursorrules` (first match wins)
|
||||
2. **Content is read** — each file is read as UTF-8 text
|
||||
3. **Security scan** — content is checked for prompt injection patterns
|
||||
4. **Truncation** — files exceeding 20,000 characters are head/tail truncated (70% head, 20% tail, with a marker in the middle)
|
||||
4. **Truncation** — files exceeding `context_file_max_chars` characters (default 20,000) are head/tail truncated (70% head, 20% tail, with a marker in the middle)
|
||||
5. **Assembly** — all sections are combined under a `# Project Context` header
|
||||
6. **Injection** — the assembled content is added to the system prompt
|
||||
|
||||
|
|
@ -171,12 +171,12 @@ This scanner protects against common injection patterns, but it's not a substitu
|
|||
|
||||
| Limit | Value |
|
||||
|-------|-------|
|
||||
| Max chars per file | 20,000 (~7,000 tokens) |
|
||||
| Max chars per file | `context_file_max_chars` (default 20,000, ~7,000 tokens) |
|
||||
| Head truncation ratio | 70% |
|
||||
| Tail truncation ratio | 20% |
|
||||
| Truncation marker | 10% (shows char counts and suggests using file tools) |
|
||||
|
||||
When a file exceeds 20,000 characters, the truncation message reads:
|
||||
When a file exceeds the configured limit, the truncation message reads:
|
||||
|
||||
```
|
||||
[...truncated AGENTS.md: kept 14000+4000 of 25000 chars. Use file tools to read the full file.]
|
||||
|
|
@ -185,7 +185,7 @@ When a file exceeds 20,000 characters, the truncation message reads:
|
|||
## Tips for Effective Context Files
|
||||
|
||||
:::tip Best practices for AGENTS.md
|
||||
1. **Keep it concise** — stay well under 20K chars; the agent reads it every turn
|
||||
1. **Keep it concise** — stay under your configured `context_file_max_chars`; the agent reads it every turn
|
||||
2. **Structure with headers** — use `##` sections for architecture, conventions, important notes
|
||||
3. **Include concrete examples** — show preferred code patterns, API shapes, naming conventions
|
||||
4. **Mention what NOT to do** — "never modify migration files directly"
|
||||
|
|
|
|||
|
|
@ -128,7 +128,7 @@ def load_soul_md() -> Optional[str]:
|
|||
return None
|
||||
content = soul_path.read_text(encoding="utf-8").strip()
|
||||
content = _scan_context_content(content, "SOUL.md") # Security scan
|
||||
content = _truncate_content(content, "SOUL.md") # Cap at 20k chars
|
||||
content = _truncate_content(content, "SOUL.md") # Cap defaults to 20k chars, configurable
|
||||
return content
|
||||
```
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue