fix(homeassistant): don't consume cooldown on no-op state_changed events (#12062)

``HomeAssistantAdapter._handle_ha_event`` writes the per-entity cooldown timestamp *before* calling ``_format_state_change``, which is what actually decides whether the event will be forwarded. For events where ``old_state == new_state`` (or where ``new_state`` is missing), the formatter returns ``None`` and the function returns early — but ``self._last_event_time[entity_id]`` has already been advanced. As a result, a rapid no-op event "uses up" the cooldown window and suppresses the next genuine state change. Reporter: #12062. Root cause ---------- ``gateway/platforms/homeassistant.py`` lines 286-299:: # Apply cooldown now = time.time() last = self._last_event_time.get(entity_id, 0) if (now - last) < self._cooldown_seconds: return self._last_event_time[entity_id] = now # <- advanced before we know # the event forwards old_state = event_data.get("old_state", {}) new_state = event_data.get("new_state", {}) message = self._format_state_change(entity_id, old_state, new_state) if not message: # <- no-op / malformed → None, return # but cooldown already burned Fix --- Keep the cooldown *check* early (so throttled events don't waste time formatting), but move the cooldown *write* to after ``_format_state_change`` returns a non-empty message. Only events that are actually forwarded consume the cooldown window. No API / config / public-behaviour change. Two lines effectively swapped; one comment added. Reproduction (confirmed on origin/main ``6fb69229``) ---------------------------------------------------- :: ha = HomeAssistantAdapter(PlatformConfig(enabled=True, token='t', extra={ 'url': 'http://x', 'watch_all': True, 'cooldown_seconds': 60, })) ha.handle_message = AsyncMock() await ha._handle_ha_event({'data': {'entity_id': 'sensor.temp', 'old_state': {'state': '20'}, 'new_state': {'state': '20', 'attributes': {}}}}) await ha._handle_ha_event({'data': {'entity_id': 'sensor.temp', 'old_state': {'state': '20'}, 'new_state': {'state': '21', 'attributes': {}}}}) assert ha.handle_message.await_count == 1 # fails on main (0) Side benefit ------------ ``_last_event_time`` no longer grows unbounded with entries for entities that only ever emit no-op events. Regression coverage ------------------- ``tests/gateway/test_homeassistant.py`` gets a new ``TestCooldownIssue12062`` class with 5 cases: * ``test_no_op_state_change_does_not_consume_cooldown`` — reporter's exact scenario. * ``test_no_op_does_not_write_last_event_time`` — structural pin on the cooldown map. * ``test_missing_new_state_does_not_consume_cooldown`` — covers the other ``_format_state_change → None`` branch. * ``test_forwarded_event_still_consumes_cooldown`` — preserved- behaviour canary so the fix can't silently disable cooldown. * ``test_no_op_then_real_change_across_entities`` — independent per-entity accounting. 4 of the 5 fail on clean ``origin/main`` with the reporter symptom; the 5th pins preserved behaviour. Validation ---------- ``source venv/bin/activate && python -m pytest tests/gateway/test_homeassistant.py -q`` → **50 passed** (45 pre-existing + 5 new). Broader ``tests/gateway`` under ``-n auto`` → 13 pre-existing baseline failures (dingtalk card lifecycle, matrix encrypted upload, approve/deny E2E, whatsapp bridge runtime / xdist flakes). Zero in ``test_homeassistant.py`` or any touched code path. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-25 00:51:20 +00:00 · 2026-04-18 15:31:28 +01:00 · 2026-04-18 15:31:28 +01:00 · 8d8a6c30c6
commit 8d8a6c30c6
parent 6fb69229ca
2 changed files with 116 additions and 2 deletions
--- a/gateway/platforms/homeassistant.py
+++ b/gateway/platforms/homeassistant.py
@ -283,12 +283,14 @@ class HomeAssistantAdapter(BasePlatformAdapter):
            # No filters configured and watch_all is off — drop the event
            return

-        # Apply cooldown
+        # Apply cooldown check — but do NOT record the timestamp yet.
+        # Unchanged or malformed events (where ``_format_state_change``
+        # returns ``None`` below) must not consume the cooldown window
+        # and suppress the next real state change.  See #12062.
        now = time.time()
        last = self._last_event_time.get(entity_id, 0)
        if (now - last) < self._cooldown_seconds:
            return
-        self._last_event_time[entity_id] = now

        # Build human-readable message
        old_state = event_data.get("old_state", {})
@ -298,6 +300,9 @@ class HomeAssistantAdapter(BasePlatformAdapter):
        if not message:
            return

+        # Only consume the cooldown window for events we actually forward.
+        self._last_event_time[entity_id] = now
+
        # Build MessageEvent and forward to handler
        source = self.build_source(
            chat_id="ha_events",
--- a/tests/gateway/test_homeassistant.py
+++ b/tests/gateway/test_homeassistant.py
@ -415,6 +415,115 @@ class TestCooldown:
        assert adapter.handle_message.call_count == 5


+# ---------------------------------------------------------------------------
+# Regression coverage: #12062 — no-op events must not consume cooldown.
+#
+# The cooldown timestamp is meant to throttle *forwarded* messages. Events
+# that the adapter decides not to forward (state didn't actually change,
+# new_state missing, etc.) must leave the cooldown window untouched so the
+# next genuine change still reaches handle_message.
+# ---------------------------------------------------------------------------
+
+
+class TestCooldownIssue12062:
+    @pytest.mark.asyncio
+    async def test_no_op_state_change_does_not_consume_cooldown(self):
+        """Reporter's repro: no-op followed by a real change within the
+        cooldown window must still forward the real change."""
+        adapter = _make_adapter(watch_all=True, cooldown_seconds=60)
+
+        # First event: old_state == new_state — adapter must not forward,
+        # and must not consume the cooldown window.
+        await adapter._handle_ha_event(
+            _make_event("sensor.temp", "20", "20",
+                        new_attrs={"friendly_name": "Temp"})
+        )
+        adapter.handle_message.assert_not_called()
+
+        # Second event: real change, fired immediately. Under the bug the
+        # no-op had already written _last_event_time, so this gets dropped.
+        await adapter._handle_ha_event(
+            _make_event("sensor.temp", "20", "21",
+                        new_attrs={"friendly_name": "Temp"})
+        )
+        assert adapter.handle_message.await_count == 1
+
+    @pytest.mark.asyncio
+    async def test_no_op_does_not_write_last_event_time(self):
+        """Direct structural pin: the cooldown map must be empty after
+        a no-op event so a future refactor can't reintroduce the leak."""
+        adapter = _make_adapter(watch_all=True, cooldown_seconds=60)
+
+        await adapter._handle_ha_event(
+            _make_event("sensor.temp", "on", "on",
+                        new_attrs={"friendly_name": "Temp"})
+        )
+
+        assert adapter.handle_message.await_count == 0
+        assert "sensor.temp" not in adapter._last_event_time
+
+    @pytest.mark.asyncio
+    async def test_missing_new_state_does_not_consume_cooldown(self):
+        """Malformed event (no new_state payload) must also not block the
+        next real change."""
+        adapter = _make_adapter(watch_all=True, cooldown_seconds=60)
+
+        # _format_state_change returns None for empty new_state too;
+        # cooldown must not be consumed in that branch either.
+        await adapter._handle_ha_event({
+            "data": {
+                "entity_id": "sensor.temp",
+                "old_state": {"state": "20", "attributes": {}},
+                "new_state": {},  # empty → formatter returns None
+            }
+        })
+        adapter.handle_message.assert_not_called()
+        assert "sensor.temp" not in adapter._last_event_time
+
+        await adapter._handle_ha_event(
+            _make_event("sensor.temp", "20", "21",
+                        new_attrs={"friendly_name": "Temp"})
+        )
+        assert adapter.handle_message.await_count == 1
+
+    @pytest.mark.asyncio
+    async def test_forwarded_event_still_consumes_cooldown(self):
+        """Preserved behaviour canary: once an event *is* forwarded, the
+        cooldown window must close for subsequent rapid events."""
+        adapter = _make_adapter(watch_all=True, cooldown_seconds=60)
+
+        await adapter._handle_ha_event(
+            _make_event("sensor.temp", "20", "21",
+                        new_attrs={"friendly_name": "Temp"})
+        )
+        assert adapter.handle_message.await_count == 1
+        assert "sensor.temp" in adapter._last_event_time
+
+        # Rapid follow-up real change — cooldown blocks it (unchanged).
+        await adapter._handle_ha_event(
+            _make_event("sensor.temp", "21", "22",
+                        new_attrs={"friendly_name": "Temp"})
+        )
+        assert adapter.handle_message.await_count == 1
+
+    @pytest.mark.asyncio
+    async def test_no_op_then_real_change_across_entities(self):
+        """Regression: a no-op for entity A must not affect entity B's
+        independent cooldown accounting."""
+        adapter = _make_adapter(watch_all=True, cooldown_seconds=60)
+
+        await adapter._handle_ha_event(
+            _make_event("sensor.a", "1", "1", new_attrs={"friendly_name": "A"})
+        )
+        await adapter._handle_ha_event(
+            _make_event("sensor.b", "3", "4", new_attrs={"friendly_name": "B"})
+        )
+        # A was a no-op (not forwarded), B is a real change (forwarded once)
+        assert adapter.handle_message.await_count == 1
+        assert "sensor.a" not in adapter._last_event_time
+        assert "sensor.b" in adapter._last_event_time
+
+
 # ---------------------------------------------------------------------------
 # Config integration (env overrides, round-trip)
 # ---------------------------------------------------------------------------