fix(windows): stop spamming cwd-missing + tirith-spawn warnings on every terminal call

Two log-spam fixes surfaced by a Windows user (Git Bash + Python 3.11.9):

1. LocalEnvironment cwd warn spam
   ============================
   Git Bash's `pwd -P` emits paths like `/c/Users/x`. The base-class
   `_extract_cwd_from_output` was assigning this verbatim to `self.cwd`
   without validation, then `_resolve_safe_cwd`'s `os.path.isdir(/c/...)`
   returned False on Windows, triggering:

       LocalEnvironment cwd '/c/Users/NVIDIA' is missing on disk;
       falling back to '/' so terminal commands keep working.

   ...on every terminal call. The pre-existing Windows-path translation
   inside `_run_bash` ran AFTER the safe-cwd check, so it could never
   prevent the warning.

   Fix:
   - New `_msys_to_windows_path` helper (idempotent, no-op off Windows).
   - `_resolve_safe_cwd` normalizes before `isdir`, so a valid MSYS path
     is recognized as the real directory it points at.
   - `LocalEnvironment._update_cwd` and a new override of
     `_extract_cwd_from_output` translate + validate before mutating
     `self.cwd`. Stale / non-existent marker paths roll back to the
     previous cwd instead of clobbering it.
   - The fallback warning still fires when the directory really is gone
     (deletion-recovery scenario from #17558 still covered).

2. tirith spawn-failed warn spam
   =============================
   When tirith isn't installed (background install in flight, or marked
   failed for the day) and the configured path stays as the bare string
   `tirith`, every `subprocess.run([tirith_path, ...])` raises OSError
   and logged:

       tirith spawn failed: [WinError 2] The system cannot find the file specified

   ...on every command. fail_open=True means behaviour is correct, but
   the log noise is severe.

   Fix:
   - `_warn_once(key, ...)` thread-safe dedupe helper.
   - Three hot-path warnings (`tirith path resolved to None`,
     `tirith spawn failed: ...`, `tirith timed out after Ns`) now log
     once per (exception class, errno) / timeout-value / path-none key.
   - Dedupe set is cleared on `_clear_install_failed` so a successful
     install lets a subsequent failure surface again.

Tests
=====
- `tests/tools/test_local_env_windows_msys.py`: 12 tests covering the
  MSYS→Windows translator, the resolve fast-path, update_cwd validation,
  and extract_cwd_from_output rollback.
- `tests/tools/test_tirith_security.py`: 4 new dedupe tests (15 spawn
  failures → 1 log line; distinct exc types → 2 lines; timeout dedupe;
  path-None dedupe).

Targeted runs:
  test_local_env_windows_msys.py      12 passed
  test_local_env_cwd_recovery.py       7 passed (pre-existing, no regressions)
  test_tirith_security.py             67 passed (63 pre-existing + 4 new)
  test_base_environment + local_*    37 passed (no regressions)
  test_local_env_blocklist + neighbours  114 passed

Reported via Hermes log capture: 19× cwd warnings + 15× tirith warnings
in a single short session.
This commit is contained in:
teknium1 2026-05-15 14:58:28 -07:00 committed by Teknium
parent 7fee1f61eb
commit 4aec25bc44
4 changed files with 441 additions and 14 deletions

View file

@ -1007,3 +1007,120 @@ class TestHermesHomeIsolation:
expected = os.path.join(os.path.expanduser("~"), ".hermes")
result = _get_hermes_home()
assert result == expected
# ---------------------------------------------------------------------------
# Warn-once dedupe (issue: tirith spawn failed spamming on Windows)
# ---------------------------------------------------------------------------
class TestSpawnWarningDedup:
"""When tirith isn't installed yet (background install in flight, or
install marked failed), every terminal command spammed an identical
``tirith spawn failed: [WinError 2]`` warning to ``errors.log``. The
dedupe set in ``_warn_once`` collapses repeats by ``(exc class, errno)``
while still surfacing the first occurrence so users see the failure.
"""
@patch("tools.tirith_security.subprocess.run")
@patch("tools.tirith_security._load_security_config")
def test_repeated_spawn_failure_logs_once(self, mock_cfg, mock_run, caplog):
mock_cfg.return_value = {
"tirith_enabled": True, "tirith_path": "tirith",
"tirith_timeout": 5, "tirith_fail_open": True,
}
mock_run.side_effect = FileNotFoundError("[WinError 2]")
# Fresh dedupe state — clear any keys left by other tests.
_tirith_mod._reset_spawn_warning_state()
with caplog.at_level("WARNING", logger="tools.tirith_security"):
for _ in range(15):
result = check_command_security("echo hi")
# Behavior must remain the same on every call —
# fail-open allow, with the exception captured in summary.
assert result["action"] == "allow"
assert "unavailable" in result["summary"]
spawn_warnings = [
rec for rec in caplog.records
if "tirith spawn failed" in rec.message
]
assert len(spawn_warnings) == 1, (
f"expected exactly 1 spawn-failed warning across 15 commands, "
f"got {len(spawn_warnings)}: {[r.message for r in spawn_warnings]}"
)
@patch("tools.tirith_security.subprocess.run")
@patch("tools.tirith_security._load_security_config")
def test_distinct_exception_types_each_log_once(self, mock_cfg, mock_run, caplog):
"""``FileNotFoundError`` and ``PermissionError`` are distinct
failure modes and each deserves its own first-occurrence log
line; the dedupe key includes the exception class."""
mock_cfg.return_value = {
"tirith_enabled": True, "tirith_path": "tirith",
"tirith_timeout": 5, "tirith_fail_open": True,
}
_tirith_mod._reset_spawn_warning_state()
with caplog.at_level("WARNING", logger="tools.tirith_security"):
mock_run.side_effect = FileNotFoundError("[WinError 2]")
for _ in range(3):
check_command_security("a")
mock_run.side_effect = PermissionError("denied")
for _ in range(3):
check_command_security("b")
spawn_warnings = [
rec for rec in caplog.records
if "tirith spawn failed" in rec.message
]
assert len(spawn_warnings) == 2, (
f"expected 2 distinct first-occurrence warnings, "
f"got {len(spawn_warnings)}"
)
@patch("tools.tirith_security.subprocess.run")
@patch("tools.tirith_security._load_security_config")
def test_repeated_timeout_logs_once(self, mock_cfg, mock_run, caplog):
mock_cfg.return_value = {
"tirith_enabled": True, "tirith_path": "tirith",
"tirith_timeout": 5, "tirith_fail_open": True,
}
mock_run.side_effect = subprocess.TimeoutExpired(cmd="tirith", timeout=5)
_tirith_mod._reset_spawn_warning_state()
with caplog.at_level("WARNING", logger="tools.tirith_security"):
for _ in range(10):
result = check_command_security("slow")
assert result["action"] == "allow"
timeout_warnings = [
rec for rec in caplog.records
if "tirith timed out" in rec.message
]
assert len(timeout_warnings) == 1
@patch("tools.tirith_security._load_security_config")
def test_path_none_logs_once(self, mock_cfg, caplog):
"""``_resolve_tirith_path`` returning ``None`` (explicit path set
but resolver returned None unusual) should not spam the log
either."""
mock_cfg.return_value = {
"tirith_enabled": True, "tirith_path": "tirith",
"tirith_timeout": 5, "tirith_fail_open": True,
}
_tirith_mod._reset_spawn_warning_state()
with patch(
"tools.tirith_security._resolve_tirith_path", return_value=None
):
with caplog.at_level("WARNING", logger="tools.tirith_security"):
for _ in range(10):
result = check_command_security("echo")
assert result["action"] == "allow"
assert "tirith path unavailable" in result["summary"]
none_warnings = [
rec for rec in caplog.records
if "tirith path resolved to None" in rec.message
]
assert len(none_warnings) == 1