mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
perf: fix O(n²) catastrophic backtracking in redact regex + reorder file read guard
Two pre-existing issues causing test_file_read_guards timeouts on CI:
1. agent/redact.py: _ENV_ASSIGN_RE used unbounded [A-Z_]* with
IGNORECASE, matching any letter/underscore to end-of-string at
each position → O(n²) backtracking on 100K+ char inputs.
Bounded to {0,50} since env var names are never that long.
2. tools/file_tools.py: redact_sensitive_text() ran BEFORE the
character-count guard, so oversized content (that would be rejected
anyway) went through the expensive regex first. Reordered to check
size limit before redaction.
This commit is contained in:
parent
1c0c5d957f
commit
831067c5d3
2 changed files with 7 additions and 3 deletions
|
|
@ -345,8 +345,6 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
|
|||
# ── Perform the read ──────────────────────────────────────────
|
||||
file_ops = _get_file_ops(task_id)
|
||||
result = file_ops.read_file(path, offset, limit)
|
||||
if result.content:
|
||||
result.content = redact_sensitive_text(result.content)
|
||||
result_dict = result.to_dict()
|
||||
|
||||
# ── Character-count guard ─────────────────────────────────────
|
||||
|
|
@ -355,6 +353,7 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
|
|||
# amount of content, reject it and tell the model to narrow down.
|
||||
# Note: we check the formatted content (with line-number prefixes),
|
||||
# not the raw file size, because that's what actually enters context.
|
||||
# Check BEFORE redaction to avoid expensive regex on huge content.
|
||||
content_len = len(result.content or "")
|
||||
file_size = result_dict.get("file_size", 0)
|
||||
max_chars = _get_max_read_chars()
|
||||
|
|
@ -372,6 +371,11 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
|
|||
"file_size": file_size,
|
||||
}, ensure_ascii=False)
|
||||
|
||||
# ── Redact secrets (after guard check to skip oversized content) ──
|
||||
if result.content:
|
||||
result.content = redact_sensitive_text(result.content)
|
||||
result_dict["content"] = result.content
|
||||
|
||||
# Large-file hint: if the file is big and the caller didn't ask
|
||||
# for a narrow window, nudge toward targeted reads.
|
||||
if (file_size and file_size > _LARGE_FILE_HINT_BYTES
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue