mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(gateway): handle stale lock files in acquire_scoped_lock
Updated the acquire_scoped_lock function to treat empty or corrupt lock files as stale. This change ensures that if a lock file exists but is invalid, it will be removed to prevent issues with stale locks. Added tests to verify recovery from both empty and corrupt lock files.
This commit is contained in:
parent
23f668d66e
commit
c1809e85e7
2 changed files with 36 additions and 0 deletions
|
|
@ -290,6 +290,15 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
|
|||
}
|
||||
|
||||
existing = _read_json_file(lock_path)
|
||||
if existing is None and lock_path.exists():
|
||||
# Lock file exists but is empty or contains invalid JSON — treat as
|
||||
# stale. This happens when a previous process was killed between
|
||||
# O_CREAT|O_EXCL and the subsequent json.dump() (e.g. DNS failure
|
||||
# during rapid Slack reconnect retries).
|
||||
try:
|
||||
lock_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
if existing:
|
||||
try:
|
||||
existing_pid = int(existing["pid"])
|
||||
|
|
|
|||
|
|
@ -209,6 +209,33 @@ class TestScopedLocks:
|
|||
assert payload["pid"] == os.getpid()
|
||||
assert payload["metadata"]["platform"] == "telegram"
|
||||
|
||||
def test_acquire_scoped_lock_recovers_empty_lock_file(self, tmp_path, monkeypatch):
|
||||
"""Empty lock file (0 bytes) left by a crashed process should be treated as stale."""
|
||||
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
|
||||
lock_path = tmp_path / "locks" / "slack-app-token-2bb80d537b1da3e3.lock"
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
lock_path.write_text("") # simulate crash between O_CREAT and json.dump
|
||||
|
||||
acquired, existing = status.acquire_scoped_lock("slack-app-token", "secret", metadata={"platform": "slack"})
|
||||
|
||||
assert acquired is True
|
||||
payload = json.loads(lock_path.read_text())
|
||||
assert payload["pid"] == os.getpid()
|
||||
assert payload["metadata"]["platform"] == "slack"
|
||||
|
||||
def test_acquire_scoped_lock_recovers_corrupt_lock_file(self, tmp_path, monkeypatch):
|
||||
"""Lock file with invalid JSON should be treated as stale."""
|
||||
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
|
||||
lock_path = tmp_path / "locks" / "slack-app-token-2bb80d537b1da3e3.lock"
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
lock_path.write_text("{truncated") # simulate partial write
|
||||
|
||||
acquired, existing = status.acquire_scoped_lock("slack-app-token", "secret", metadata={"platform": "slack"})
|
||||
|
||||
assert acquired is True
|
||||
payload = json.loads(lock_path.read_text())
|
||||
assert payload["pid"] == os.getpid()
|
||||
|
||||
def test_release_scoped_lock_only_removes_current_owner(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue