fix(cron): fall back gracefully when HERMES_CRON_TIMEOUT is invalid

Bare `float(os.getenv("HERMES_CRON_TIMEOUT", 600))` in `run_job()` raises
a `ValueError` when the env var is set to a non-numeric string (e.g. "abc").
Replace it with the same defensive try/except pattern already used by
`_get_script_timeout()` for `HERMES_CRON_SCRIPT_TIMEOUT`: log a warning
and fall back to the 600 s default instead of crashing.

Also update the existing env-var tests to exercise the new code path and
add two new tests — one for an invalid value, one for an empty string.

Fixes #11319

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
yeyitech 2026-04-17 11:24:10 +08:00 committed by Teknium
parent 8c8fc6c1ec
commit ec27f0a3fa
2 changed files with 41 additions and 3 deletions

View file

@ -169,10 +169,20 @@ class TestInactivityTimeout:
assert result["final_response"] == "Done"
def _parse_cron_timeout(self, raw_value):
"""Mirror the defensive parsing logic from cron/scheduler.py run_job()."""
if raw_value:
try:
return float(raw_value)
except (ValueError, TypeError):
return 600.0
return 600.0
def test_timeout_env_var_parsing(self, monkeypatch):
"""HERMES_CRON_TIMEOUT env var is respected."""
monkeypatch.setenv("HERMES_CRON_TIMEOUT", "1200")
_cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
raw = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
_cron_timeout = self._parse_cron_timeout(raw)
assert _cron_timeout == 1200.0
_cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
@ -181,10 +191,27 @@ class TestInactivityTimeout:
def test_timeout_zero_means_unlimited(self, monkeypatch):
"""HERMES_CRON_TIMEOUT=0 yields None (unlimited)."""
monkeypatch.setenv("HERMES_CRON_TIMEOUT", "0")
_cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
raw = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
_cron_timeout = self._parse_cron_timeout(raw)
_cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
assert _cron_inactivity_limit is None
def test_timeout_invalid_value_falls_back_to_default(self, monkeypatch):
"""HERMES_CRON_TIMEOUT=abc should fall back to 600s, not raise ValueError."""
monkeypatch.setenv("HERMES_CRON_TIMEOUT", "abc")
raw = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
_cron_timeout = self._parse_cron_timeout(raw)
assert _cron_timeout == 600.0
_cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
assert _cron_inactivity_limit == 600.0
def test_timeout_empty_string_uses_default(self, monkeypatch):
"""HERMES_CRON_TIMEOUT='' (empty) should use the 600s default."""
monkeypatch.setenv("HERMES_CRON_TIMEOUT", "")
raw = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
_cron_timeout = self._parse_cron_timeout(raw)
assert _cron_timeout == 600.0
def test_timeout_error_includes_diagnostics(self):
"""The TimeoutError message should include last activity info."""
agent = SlowFakeAgent(