From 8d8a6c30c6daae511aaac892def7f47d6072d599 Mon Sep 17 00:00:00 2001 From: "Brian D. Evans" <252620095+briandevans@users.noreply.github.com> Date: Sat, 18 Apr 2026 15:31:28 +0100 Subject: [PATCH] fix(homeassistant): don't consume cooldown on no-op state_changed events (#12062) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``HomeAssistantAdapter._handle_ha_event`` writes the per-entity cooldown timestamp *before* calling ``_format_state_change``, which is what actually decides whether the event will be forwarded. For events where ``old_state == new_state`` (or where ``new_state`` is missing), the formatter returns ``None`` and the function returns early — but ``self._last_event_time[entity_id]`` has already been advanced. As a result, a rapid no-op event "uses up" the cooldown window and suppresses the next genuine state change. Reporter: #12062. Root cause ---------- ``gateway/platforms/homeassistant.py`` lines 286-299:: # Apply cooldown now = time.time() last = self._last_event_time.get(entity_id, 0) if (now - last) < self._cooldown_seconds: return self._last_event_time[entity_id] = now # <- advanced before we know # the event forwards old_state = event_data.get("old_state", {}) new_state = event_data.get("new_state", {}) message = self._format_state_change(entity_id, old_state, new_state) if not message: # <- no-op / malformed → None, return # but cooldown already burned Fix --- Keep the cooldown *check* early (so throttled events don't waste time formatting), but move the cooldown *write* to after ``_format_state_change`` returns a non-empty message. Only events that are actually forwarded consume the cooldown window. No API / config / public-behaviour change. Two lines effectively swapped; one comment added. Reproduction (confirmed on origin/main ``6fb69229``) ---------------------------------------------------- :: ha = HomeAssistantAdapter(PlatformConfig(enabled=True, token='t', extra={ 'url': 'http://x', 'watch_all': True, 'cooldown_seconds': 60, })) ha.handle_message = AsyncMock() await ha._handle_ha_event({'data': {'entity_id': 'sensor.temp', 'old_state': {'state': '20'}, 'new_state': {'state': '20', 'attributes': {}}}}) await ha._handle_ha_event({'data': {'entity_id': 'sensor.temp', 'old_state': {'state': '20'}, 'new_state': {'state': '21', 'attributes': {}}}}) assert ha.handle_message.await_count == 1 # fails on main (0) Side benefit ------------ ``_last_event_time`` no longer grows unbounded with entries for entities that only ever emit no-op events. Regression coverage ------------------- ``tests/gateway/test_homeassistant.py`` gets a new ``TestCooldownIssue12062`` class with 5 cases: * ``test_no_op_state_change_does_not_consume_cooldown`` — reporter's exact scenario. * ``test_no_op_does_not_write_last_event_time`` — structural pin on the cooldown map. * ``test_missing_new_state_does_not_consume_cooldown`` — covers the other ``_format_state_change → None`` branch. * ``test_forwarded_event_still_consumes_cooldown`` — preserved- behaviour canary so the fix can't silently disable cooldown. * ``test_no_op_then_real_change_across_entities`` — independent per-entity accounting. 4 of the 5 fail on clean ``origin/main`` with the reporter symptom; the 5th pins preserved behaviour. Validation ---------- ``source venv/bin/activate && python -m pytest tests/gateway/test_homeassistant.py -q`` → **50 passed** (45 pre-existing + 5 new). Broader ``tests/gateway`` under ``-n auto`` → 13 pre-existing baseline failures (dingtalk card lifecycle, matrix encrypted upload, approve/deny E2E, whatsapp bridge runtime / xdist flakes). Zero in ``test_homeassistant.py`` or any touched code path. Co-Authored-By: Claude Opus 4.6 --- gateway/platforms/homeassistant.py | 9 ++- tests/gateway/test_homeassistant.py | 109 ++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 2 deletions(-) diff --git a/gateway/platforms/homeassistant.py b/gateway/platforms/homeassistant.py index 746465594c..da1dd318ba 100644 --- a/gateway/platforms/homeassistant.py +++ b/gateway/platforms/homeassistant.py @@ -283,12 +283,14 @@ class HomeAssistantAdapter(BasePlatformAdapter): # No filters configured and watch_all is off — drop the event return - # Apply cooldown + # Apply cooldown check — but do NOT record the timestamp yet. + # Unchanged or malformed events (where ``_format_state_change`` + # returns ``None`` below) must not consume the cooldown window + # and suppress the next real state change. See #12062. now = time.time() last = self._last_event_time.get(entity_id, 0) if (now - last) < self._cooldown_seconds: return - self._last_event_time[entity_id] = now # Build human-readable message old_state = event_data.get("old_state", {}) @@ -298,6 +300,9 @@ class HomeAssistantAdapter(BasePlatformAdapter): if not message: return + # Only consume the cooldown window for events we actually forward. + self._last_event_time[entity_id] = now + # Build MessageEvent and forward to handler source = self.build_source( chat_id="ha_events", diff --git a/tests/gateway/test_homeassistant.py b/tests/gateway/test_homeassistant.py index b4ff5d8a35..cbc3f25add 100644 --- a/tests/gateway/test_homeassistant.py +++ b/tests/gateway/test_homeassistant.py @@ -415,6 +415,115 @@ class TestCooldown: assert adapter.handle_message.call_count == 5 +# --------------------------------------------------------------------------- +# Regression coverage: #12062 — no-op events must not consume cooldown. +# +# The cooldown timestamp is meant to throttle *forwarded* messages. Events +# that the adapter decides not to forward (state didn't actually change, +# new_state missing, etc.) must leave the cooldown window untouched so the +# next genuine change still reaches handle_message. +# --------------------------------------------------------------------------- + + +class TestCooldownIssue12062: + @pytest.mark.asyncio + async def test_no_op_state_change_does_not_consume_cooldown(self): + """Reporter's repro: no-op followed by a real change within the + cooldown window must still forward the real change.""" + adapter = _make_adapter(watch_all=True, cooldown_seconds=60) + + # First event: old_state == new_state — adapter must not forward, + # and must not consume the cooldown window. + await adapter._handle_ha_event( + _make_event("sensor.temp", "20", "20", + new_attrs={"friendly_name": "Temp"}) + ) + adapter.handle_message.assert_not_called() + + # Second event: real change, fired immediately. Under the bug the + # no-op had already written _last_event_time, so this gets dropped. + await adapter._handle_ha_event( + _make_event("sensor.temp", "20", "21", + new_attrs={"friendly_name": "Temp"}) + ) + assert adapter.handle_message.await_count == 1 + + @pytest.mark.asyncio + async def test_no_op_does_not_write_last_event_time(self): + """Direct structural pin: the cooldown map must be empty after + a no-op event so a future refactor can't reintroduce the leak.""" + adapter = _make_adapter(watch_all=True, cooldown_seconds=60) + + await adapter._handle_ha_event( + _make_event("sensor.temp", "on", "on", + new_attrs={"friendly_name": "Temp"}) + ) + + assert adapter.handle_message.await_count == 0 + assert "sensor.temp" not in adapter._last_event_time + + @pytest.mark.asyncio + async def test_missing_new_state_does_not_consume_cooldown(self): + """Malformed event (no new_state payload) must also not block the + next real change.""" + adapter = _make_adapter(watch_all=True, cooldown_seconds=60) + + # _format_state_change returns None for empty new_state too; + # cooldown must not be consumed in that branch either. + await adapter._handle_ha_event({ + "data": { + "entity_id": "sensor.temp", + "old_state": {"state": "20", "attributes": {}}, + "new_state": {}, # empty → formatter returns None + } + }) + adapter.handle_message.assert_not_called() + assert "sensor.temp" not in adapter._last_event_time + + await adapter._handle_ha_event( + _make_event("sensor.temp", "20", "21", + new_attrs={"friendly_name": "Temp"}) + ) + assert adapter.handle_message.await_count == 1 + + @pytest.mark.asyncio + async def test_forwarded_event_still_consumes_cooldown(self): + """Preserved behaviour canary: once an event *is* forwarded, the + cooldown window must close for subsequent rapid events.""" + adapter = _make_adapter(watch_all=True, cooldown_seconds=60) + + await adapter._handle_ha_event( + _make_event("sensor.temp", "20", "21", + new_attrs={"friendly_name": "Temp"}) + ) + assert adapter.handle_message.await_count == 1 + assert "sensor.temp" in adapter._last_event_time + + # Rapid follow-up real change — cooldown blocks it (unchanged). + await adapter._handle_ha_event( + _make_event("sensor.temp", "21", "22", + new_attrs={"friendly_name": "Temp"}) + ) + assert adapter.handle_message.await_count == 1 + + @pytest.mark.asyncio + async def test_no_op_then_real_change_across_entities(self): + """Regression: a no-op for entity A must not affect entity B's + independent cooldown accounting.""" + adapter = _make_adapter(watch_all=True, cooldown_seconds=60) + + await adapter._handle_ha_event( + _make_event("sensor.a", "1", "1", new_attrs={"friendly_name": "A"}) + ) + await adapter._handle_ha_event( + _make_event("sensor.b", "3", "4", new_attrs={"friendly_name": "B"}) + ) + # A was a no-op (not forwarded), B is a real change (forwarded once) + assert adapter.handle_message.await_count == 1 + assert "sensor.a" not in adapter._last_event_time + assert "sensor.b" in adapter._last_event_time + + # --------------------------------------------------------------------------- # Config integration (env overrides, round-trip) # ---------------------------------------------------------------------------