fix(pairing): handle legacy plaintext pending entries during upgrade

When an existing install upgrades to the hashed-pending schema, its
on-disk pending.json still has the old {code: entry} format with no
hash/salt fields. The original PR #8056 assumed every entry had both
fields and would have KeyErrored in approve_code, list_pending, and
_cleanup_expired.

Guard each consumer:
  - approve_code: skip entries that are not a dict, lack salt/hash,
    or have a non-hex salt. Legacy entries simply fail to match.
  - list_pending: tolerate missing 'hash' (show "legacy" placeholder)
    and non-numeric created_at (skip the row).
  - _cleanup_expired: treat malformed/legacy entries as expired so
    they get pruned on the next call rather than wedging the file.

Regression tests cover all three consumers plus a mixed-malformed
case.
This commit is contained in:
Teknium 2026-05-22 04:08:26 -07:00
parent 2e509422ef
commit 82c2035823
2 changed files with 147 additions and 12 deletions

View file

@ -220,7 +220,9 @@ class PairingStore:
Verification: the user-provided code is hashed with each stored
entry's salt and compared to the stored hash using constant-time
comparison.
comparison. Pre-hash entries (legacy plaintext-key format from
pre-upgrade pending.json files) are silently ignored they get
pruned at TTL by ``_cleanup_expired``.
"""
with self._lock:
self._cleanup_expired(platform)
@ -236,11 +238,23 @@ class PairingStore:
pending = self._load_json(self._pending_path(platform))
# Find the entry whose hash matches the provided code
# Find the entry whose hash matches the provided code.
# Tolerate legacy plaintext-key entries (no salt/hash) and
# malformed entries — skip them rather than KeyError, so an
# in-place upgrade across an existing pending.json doesn't
# crash on the first approve call. Legacy entries get pruned
# at their TTL by _cleanup_expired.
matched_key = None
matched_entry = None
for entry_id, entry in pending.items():
salt = bytes.fromhex(entry["salt"])
if not isinstance(entry, dict):
continue
if "salt" not in entry or "hash" not in entry:
continue
try:
salt = bytes.fromhex(entry["salt"])
except ValueError:
continue
candidate_hash = self._hash_code(code, salt)
if secrets.compare_digest(candidate_hash, entry["hash"]):
matched_key = entry_id
@ -268,7 +282,9 @@ class PairingStore:
Codes are stored hashed the ``code`` field is replaced with the
first 8 hex characters of the hash so admins can distinguish entries
without revealing the original code.
without revealing the original code. Legacy plaintext-key entries
(pre-hash format) are shown with a "legacy" placeholder so admins
can see them age out without crashing on a missing ``hash`` field.
"""
results = []
platforms = [platform] if platform else self._all_platforms("pending")
@ -276,11 +292,18 @@ class PairingStore:
self._cleanup_expired(p)
pending = self._load_json(self._pending_path(p))
for entry_id, info in pending.items():
age_min = int((time.time() - info["created_at"]) / 60)
if not isinstance(info, dict):
continue
created_at = info.get("created_at")
if not isinstance(created_at, (int, float)):
continue
age_min = int((time.time() - created_at) / 60)
hash_val = info.get("hash")
code_display = hash_val[:8] if isinstance(hash_val, str) else "legacy"
results.append({
"platform": p,
"code": info["hash"][:8],
"user_id": info["user_id"],
"code": code_display,
"user_id": info.get("user_id", ""),
"user_name": info.get("user_name", ""),
"age_minutes": age_min,
})
@ -337,14 +360,26 @@ class PairingStore:
# ----- Cleanup -----
def _cleanup_expired(self, platform: str) -> None:
"""Remove expired pending codes."""
"""Remove expired pending codes.
Tolerant of malformed / legacy entries anything without a numeric
``created_at`` is treated as expired (it's effectively unusable
with the new hash-keyed schema anyway).
"""
path = self._pending_path(platform)
pending = self._load_json(path)
now = time.time()
expired = [
entry_id for entry_id, info in pending.items()
if (now - info["created_at"]) > CODE_TTL_SECONDS
]
expired = []
for entry_id, info in pending.items():
if not isinstance(info, dict):
expired.append(entry_id)
continue
created_at = info.get("created_at")
if not isinstance(created_at, (int, float)):
expired.append(entry_id)
continue
if (now - created_at) > CODE_TTL_SECONDS:
expired.append(entry_id)
if expired:
for entry_id in expired:
del pending[entry_id]

View file

@ -168,6 +168,106 @@ class TestHashedStorage:
assert h3 != h1
class TestLegacyPendingFileCompat:
"""Defensive coverage for pre-hash pending.json on upgraded installs.
Existing user installs may have a pending.json written by the old
code (plaintext code as key, no hash/salt fields). The new
approve_code / list_pending / _cleanup_expired must not crash on
those entries they should be ignored and aged out at TTL.
"""
@staticmethod
def _write_legacy(tmp_path, code="ABCD1234", created_at=None):
"""Write a pre-hash pending.json with plaintext code as the key."""
import time as _time
if created_at is None:
created_at = _time.time()
legacy = {
code: {
"user_id": "legacy-user",
"user_name": "Legacy",
"created_at": created_at,
}
}
(tmp_path / "telegram-pending.json").write_text(
json.dumps(legacy), encoding="utf-8"
)
def test_approve_code_ignores_legacy_entries(self, tmp_path):
"""A valid old-format code must NOT silently approve under the new schema."""
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
self._write_legacy(tmp_path, code="LEGACY01")
store = PairingStore()
# The plaintext "code" used to be the key — under the new schema
# it's not even looked at, and there's no hash/salt to verify.
# Result: approve_code returns None, the legacy entry is left
# alone (gets pruned by _cleanup_expired at TTL).
result = store.approve_code("telegram", "LEGACY01")
assert result is None
# Approved list must be empty
assert store.is_approved("telegram", "legacy-user") is False
def test_list_pending_handles_legacy_entries(self, tmp_path):
"""list_pending must not KeyError on a missing 'hash' field."""
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
self._write_legacy(tmp_path)
store = PairingStore()
pending = store.list_pending("telegram")
assert len(pending) == 1
assert pending[0]["user_id"] == "legacy-user"
assert pending[0]["code"] == "legacy" # placeholder
def test_cleanup_expired_removes_legacy_at_ttl(self, tmp_path):
"""Legacy entries past CODE_TTL must still get pruned."""
import time as _time
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
self._write_legacy(
tmp_path,
code="LEGACY99",
created_at=_time.time() - CODE_TTL_SECONDS - 1,
)
store = PairingStore()
store._cleanup_expired("telegram")
raw = json.loads(
(tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
)
assert raw == {}
def test_cleanup_expired_handles_malformed_entries(self, tmp_path):
"""Non-dict / missing-created_at entries get evicted, not crashed on."""
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
(tmp_path / "telegram-pending.json").write_text(
json.dumps({
"broken1": "not a dict",
"broken2": {"user_id": "x"}, # no created_at
"broken3": {"created_at": "not a number"},
}),
encoding="utf-8",
)
store = PairingStore()
store._cleanup_expired("telegram")
raw = json.loads(
(tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
)
assert raw == {}
def test_approve_code_skips_malformed_entries(self, tmp_path):
"""Malformed entries must not crash approve_code's hash loop."""
import time as _time
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
(tmp_path / "telegram-pending.json").write_text(
json.dumps({
"broken": {"user_id": "x", "created_at": _time.time(),
"salt": "not-hex", "hash": "doesntmatter"},
}),
encoding="utf-8",
)
store = PairingStore()
# Approving with any code must just return None, not crash.
assert store.approve_code("telegram", "ABCD1234") is None
# ---------------------------------------------------------------------------
# Rate limiting
# ---------------------------------------------------------------------------