From 6b4fb9f8789717e5dad8d920cbd6cd02e53c5175 Mon Sep 17 00:00:00 2001 From: Tranquil-Flow Date: Sun, 3 May 2026 09:00:34 +1000 Subject: [PATCH] fix(cron): treat non-dict origin as missing instead of crashing tick MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``_resolve_origin`` called ``origin.get('platform')`` on whatever ``job.get('origin')`` returned. The leading ``if not origin: return None`` short-circuited the falsy cases (None, empty dict, "") but a non-empty string passed that guard and then crashed with ``AttributeError: 'str' object has no attribute 'get'`` on every fire attempt. Observed in the wild after a migration script tagged jobs with free-form provenance strings (e.g. ``"combined-digest-replaces-x-and-y-20260503"``). ``mark_job_run`` did record ``last_status: error, last_error: "'str' object has no attribute 'get'"`` once, but the next tick re-loaded the same poisoned origin and crashed identically. The job stayed enabled, fired every tick, and accumulated cascading errors in the log until ``origin`` was patched manually. Replace the falsy guard with ``isinstance(origin, dict)``. Non-dict origins (string, int, list, tuple, float — anything that survived a hand-edit, JSON-script write, or migration) are now treated the same as a missing origin: the job continues with ``deliver`` falling back through its normal home-channel path instead of crashing the scheduler loop. Test parametrises the non-dict shapes that can appear in jobs.json through external writers and asserts ``_resolve_origin`` returns None for each. Note: this fix scope is the non-dict-``origin`` crash only. The ``next_run_at: null`` recurring-job recovery (the second sub-bug in #18722) is independently addressed by the in-flight #18825, which extends the never-silently-disable defense from #16265 to ``get_due_jobs()`` — that approach is well-aligned with the existing recovery pattern and ships fine without a competing change here. Fixes #18722 (non-dict origin crash; recurring-job recovery covered by #18825) --- cron/scheduler.py | 14 ++++++++++++-- tests/cron/test_scheduler.py | 23 +++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/cron/scheduler.py b/cron/scheduler.py index fafcbfab95..2cb1547ad3 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -123,9 +123,19 @@ _LOCK_FILE = _LOCK_DIR / ".tick.lock" def _resolve_origin(job: dict) -> Optional[dict]: - """Extract origin info from a job, preserving any extra routing metadata.""" + """Extract origin info from a job, preserving any extra routing metadata. + + Treats non-dict origins (free-form provenance strings, ints, lists from + migration scripts or hand-edited jobs.json) as missing instead of + crashing with ``AttributeError`` on ``origin.get(...)``. Without this + guard, a job tagged with e.g. ``"combined-digest-replaces-x-and-y"`` + crashed every fire attempt with + ``'str' object has no attribute 'get'`` — ``mark_job_run`` recorded the + failure, but the next tick re-loaded the same poisoned origin and + crashed identically until the field was patched manually (#18722). + """ origin = job.get("origin") - if not origin: + if not isinstance(origin, dict): return None platform = origin.get("platform") chat_id = origin.get("chat_id") diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index 8c204d9a51..b12bb578a3 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -46,6 +46,29 @@ class TestResolveOrigin: job = {"origin": {}} assert _resolve_origin(job) is None + @pytest.mark.parametrize( + "non_dict_origin", + [ + "combined-digest-replaces-x-and-y-20260503", + 123, + ["telegram", "12345"], + ("platform", "chat_id"), + 42.0, + ], + ) + def test_non_dict_origin_returns_none_instead_of_crashing(self, non_dict_origin): + """Non-dict origins (provenance strings from hand-edited or migrated + jobs.json) must be treated as missing instead of crashing the + scheduler tick on ``origin.get('platform')`` with + ``'str' object has no attribute 'get'`` (#18722). + + Before this guard a job in this state crashed every fire attempt + forever; ``mark_job_run`` recorded the error but the next tick + re-loaded the poisoned origin and crashed identically. + """ + job = {"origin": non_dict_origin} + assert _resolve_origin(job) is None + class TestResolveDeliveryTarget: def test_origin_delivery_preserves_thread_id(self):