mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(container_boot): rotate container-boot.log when it exceeds 256 KiB
PR #30136 review noted: container-boot.log was append-only with no rotation. On a long-lived container with frequent restarts and many profiles it would grow unboundedly (~80 B per profile per reconcile pass). Add a soft cap: when the file size hits 256 KiB (`_LOG_ROTATE_BYTES`, ≈3000 reconcile lines, ≈1 year of daily reboots × 5 profiles), the current file is renamed to `container-boot.log.1` (replacing any existing one) before new entries are appended. Worst case is two files at ~512 KiB — well within visibility limits for grep/cat. Rotation is intentionally simple (no logrotate or s6-log machinery for one append-only file). Failures during rotation are logged via the module logger and treated as non-fatal — we keep appending to the existing file rather than dropping the reconcile entry. Three new unit tests cover above-threshold rotation, below-threshold non-rotation, and overwrite of an existing .1 file.
This commit is contained in:
parent
9914bfc594
commit
4443fb481d
2 changed files with 111 additions and 1 deletions
|
|
@ -229,12 +229,32 @@ def _write_reconcile_log(
|
|||
up". Keeping a separate log file (vs. mixing into agent.log) lets
|
||||
troubleshooters grep for "profile=foo" without wading through
|
||||
unrelated activity.
|
||||
|
||||
Size-bounded: when the file exceeds ``_LOG_ROTATE_BYTES``
|
||||
(defaults to 256 KiB ≈ 3000 reconcile lines), the current file
|
||||
is renamed to ``container-boot.log.1`` (replacing any previous
|
||||
rotation) before the new entries are appended. This gives long-
|
||||
lived containers a soft cap of ~512 KiB across the two files
|
||||
without pulling in logrotate or s6-log machinery just for this
|
||||
one append-only file (PR #30136 review item O3).
|
||||
"""
|
||||
import time
|
||||
log_dir = hermes_home / "logs"
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
log_path = log_dir / "container-boot.log"
|
||||
|
||||
# Rotate before opening to append, so the new entries always land
|
||||
# in a fresh file when we crossed the threshold last time.
|
||||
try:
|
||||
if log_path.exists() and log_path.stat().st_size >= _LOG_ROTATE_BYTES:
|
||||
log_path.replace(log_dir / "container-boot.log.1")
|
||||
except OSError as exc:
|
||||
# Rotation failure is non-fatal — keep appending to the
|
||||
# existing file rather than losing the entry entirely.
|
||||
log.warning("could not rotate %s: %s", log_path, exc)
|
||||
|
||||
ts = time.strftime("%Y-%m-%dT%H:%M:%S%z")
|
||||
with (log_dir / "container-boot.log").open("a", encoding="utf-8") as f:
|
||||
with log_path.open("a", encoding="utf-8") as f:
|
||||
for a in actions:
|
||||
f.write(
|
||||
f"{ts} profile={a.profile} prior_state={a.prior_state} "
|
||||
|
|
@ -242,6 +262,14 @@ def _write_reconcile_log(
|
|||
)
|
||||
|
||||
|
||||
# 256 KiB soft cap on container-boot.log; rotated to .1 when crossed.
|
||||
# At ~80 B per reconcile-action line this is ~3000 lines, or about a
|
||||
# year of daily reboots on a 5-profile container. Two files = ~512 KiB
|
||||
# worst case. Tuned for visibility (small enough to grep / cat without
|
||||
# scrolling forever) more than space (the persistent volume has GB).
|
||||
_LOG_ROTATE_BYTES = 256 * 1024
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Entry point invoked from /etc/cont-init.d/02-reconcile-profiles."""
|
||||
hermes_home = Path(os.environ.get("HERMES_HOME", "/opt/data"))
|
||||
|
|
|
|||
|
|
@ -223,6 +223,88 @@ def test_reconcile_log_is_written(tmp_path: Path) -> None:
|
|||
assert "action=registered" in log
|
||||
|
||||
|
||||
def test_reconcile_log_rotates_when_size_exceeded(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""When container-boot.log exceeds _LOG_ROTATE_BYTES, the existing
|
||||
file is rotated to .1 before the new entries are appended."""
|
||||
from hermes_cli import container_boot
|
||||
|
||||
# Tighten the threshold so we don't have to write 256 KiB.
|
||||
monkeypatch.setattr(container_boot, "_LOG_ROTATE_BYTES", 200)
|
||||
|
||||
log_path = tmp_path / "logs" / "container-boot.log"
|
||||
log_path.parent.mkdir()
|
||||
log_path.write_text("X" * 300) # already over the threshold
|
||||
|
||||
scandir = tmp_path / "run-service"; scandir.mkdir()
|
||||
_make_profile(tmp_path, "coder", state="running")
|
||||
|
||||
reconcile_profile_gateways(
|
||||
hermes_home=tmp_path, scandir=scandir, dry_run=False,
|
||||
)
|
||||
|
||||
rotated = tmp_path / "logs" / "container-boot.log.1"
|
||||
assert rotated.exists(), "expected previous log to be rotated to .1"
|
||||
assert rotated.read_text().startswith("X" * 300)
|
||||
# The new entries land in a fresh container-boot.log (no leftover Xs).
|
||||
new_contents = log_path.read_text()
|
||||
assert "X" not in new_contents
|
||||
assert "profile=coder" in new_contents
|
||||
|
||||
|
||||
def test_reconcile_log_does_not_rotate_below_threshold(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""A small existing log is appended to in place; no .1 is created."""
|
||||
from hermes_cli import container_boot
|
||||
monkeypatch.setattr(container_boot, "_LOG_ROTATE_BYTES", 10_000_000)
|
||||
|
||||
log_path = tmp_path / "logs" / "container-boot.log"
|
||||
log_path.parent.mkdir()
|
||||
log_path.write_text("previous entry\n")
|
||||
|
||||
scandir = tmp_path / "run-service"; scandir.mkdir()
|
||||
_make_profile(tmp_path, "coder", state="running")
|
||||
|
||||
reconcile_profile_gateways(
|
||||
hermes_home=tmp_path, scandir=scandir, dry_run=False,
|
||||
)
|
||||
|
||||
assert not (tmp_path / "logs" / "container-boot.log.1").exists()
|
||||
contents = log_path.read_text()
|
||||
assert contents.startswith("previous entry\n")
|
||||
assert "profile=coder" in contents
|
||||
|
||||
|
||||
def test_reconcile_log_rotation_overwrites_existing_dot1(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Rotating again replaces the prior .1 — we keep at most one
|
||||
rotated file (soft cap of ~2 × threshold)."""
|
||||
from hermes_cli import container_boot
|
||||
monkeypatch.setattr(container_boot, "_LOG_ROTATE_BYTES", 200)
|
||||
|
||||
log_dir = tmp_path / "logs"; log_dir.mkdir()
|
||||
(log_dir / "container-boot.log.1").write_text("OLD ROTATION")
|
||||
(log_dir / "container-boot.log").write_text("Y" * 300)
|
||||
|
||||
scandir = tmp_path / "run-service"; scandir.mkdir()
|
||||
_make_profile(tmp_path, "coder", state="running")
|
||||
|
||||
reconcile_profile_gateways(
|
||||
hermes_home=tmp_path, scandir=scandir, dry_run=False,
|
||||
)
|
||||
|
||||
# .1 now contains the previous .log (Ys), not OLD ROTATION.
|
||||
rotated = (log_dir / "container-boot.log.1").read_text()
|
||||
assert "OLD ROTATION" not in rotated
|
||||
assert rotated.startswith("Y" * 300)
|
||||
|
||||
|
||||
def test_dry_run_makes_no_filesystem_changes(tmp_path: Path) -> None:
|
||||
scandir = tmp_path / "run-service"; scandir.mkdir()
|
||||
profile = _make_profile(tmp_path, "coder", state="running", with_pid=True)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue