fix(homeassistant): don't consume cooldown on no-op state_changed events (#12062)

``HomeAssistantAdapter._handle_ha_event`` writes the per-entity cooldown
timestamp *before* calling ``_format_state_change``, which is what
actually decides whether the event will be forwarded.  For events
where ``old_state == new_state`` (or where ``new_state`` is missing),
the formatter returns ``None`` and the function returns early — but
``self._last_event_time[entity_id]`` has already been advanced.

As a result, a rapid no-op event "uses up" the cooldown window and
suppresses the next genuine state change.  Reporter: #12062.

Root cause
----------
``gateway/platforms/homeassistant.py`` lines 286-299::

    # Apply cooldown
    now = time.time()
    last = self._last_event_time.get(entity_id, 0)
    if (now - last) < self._cooldown_seconds:
        return
    self._last_event_time[entity_id] = now   # <- advanced before we know
                                             #    the event forwards

    old_state = event_data.get("old_state", {})
    new_state = event_data.get("new_state", {})
    message = self._format_state_change(entity_id, old_state, new_state)

    if not message:                           # <- no-op / malformed → None,
        return                                #    but cooldown already burned

Fix
---
Keep the cooldown *check* early (so throttled events don't waste time
formatting), but move the cooldown *write* to after ``_format_state_change``
returns a non-empty message.  Only events that are actually forwarded
consume the cooldown window.

No API / config / public-behaviour change.  Two lines effectively
swapped; one comment added.

Reproduction (confirmed on origin/main ``6fb69229``)
----------------------------------------------------
::

    ha = HomeAssistantAdapter(PlatformConfig(enabled=True, token='t', extra={
        'url': 'http://x', 'watch_all': True, 'cooldown_seconds': 60,
    }))
    ha.handle_message = AsyncMock()
    await ha._handle_ha_event({'data': {'entity_id': 'sensor.temp',
        'old_state': {'state': '20'},
        'new_state': {'state': '20', 'attributes': {}}}})
    await ha._handle_ha_event({'data': {'entity_id': 'sensor.temp',
        'old_state': {'state': '20'},
        'new_state': {'state': '21', 'attributes': {}}}})
    assert ha.handle_message.await_count == 1   # fails on main (0)

Side benefit
------------
``_last_event_time`` no longer grows unbounded with entries for
entities that only ever emit no-op events.

Regression coverage
-------------------
``tests/gateway/test_homeassistant.py`` gets a new
``TestCooldownIssue12062`` class with 5 cases:

* ``test_no_op_state_change_does_not_consume_cooldown`` — reporter's
  exact scenario.
* ``test_no_op_does_not_write_last_event_time`` — structural pin on
  the cooldown map.
* ``test_missing_new_state_does_not_consume_cooldown`` — covers the
  other ``_format_state_change → None`` branch.
* ``test_forwarded_event_still_consumes_cooldown`` — preserved-
  behaviour canary so the fix can't silently disable cooldown.
* ``test_no_op_then_real_change_across_entities`` — independent
  per-entity accounting.

4 of the 5 fail on clean ``origin/main`` with the reporter symptom;
the 5th pins preserved behaviour.

Validation
----------
``source venv/bin/activate && python -m pytest
tests/gateway/test_homeassistant.py -q`` → **50 passed** (45
pre-existing + 5 new).

Broader ``tests/gateway`` under ``-n auto`` → 13 pre-existing
baseline failures (dingtalk card lifecycle, matrix encrypted upload,
approve/deny E2E, whatsapp bridge runtime / xdist flakes).  Zero in
``test_homeassistant.py`` or any touched code path.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Brian D. Evans 2026-04-18 15:31:28 +01:00
parent 6fb69229ca
commit 8d8a6c30c6
2 changed files with 116 additions and 2 deletions

View file

@ -283,12 +283,14 @@ class HomeAssistantAdapter(BasePlatformAdapter):
# No filters configured and watch_all is off — drop the event
return
# Apply cooldown
# Apply cooldown check — but do NOT record the timestamp yet.
# Unchanged or malformed events (where ``_format_state_change``
# returns ``None`` below) must not consume the cooldown window
# and suppress the next real state change. See #12062.
now = time.time()
last = self._last_event_time.get(entity_id, 0)
if (now - last) < self._cooldown_seconds:
return
self._last_event_time[entity_id] = now
# Build human-readable message
old_state = event_data.get("old_state", {})
@ -298,6 +300,9 @@ class HomeAssistantAdapter(BasePlatformAdapter):
if not message:
return
# Only consume the cooldown window for events we actually forward.
self._last_event_time[entity_id] = now
# Build MessageEvent and forward to handler
source = self.build_source(
chat_id="ha_events",

View file

@ -415,6 +415,115 @@ class TestCooldown:
assert adapter.handle_message.call_count == 5
# ---------------------------------------------------------------------------
# Regression coverage: #12062 — no-op events must not consume cooldown.
#
# The cooldown timestamp is meant to throttle *forwarded* messages. Events
# that the adapter decides not to forward (state didn't actually change,
# new_state missing, etc.) must leave the cooldown window untouched so the
# next genuine change still reaches handle_message.
# ---------------------------------------------------------------------------
class TestCooldownIssue12062:
@pytest.mark.asyncio
async def test_no_op_state_change_does_not_consume_cooldown(self):
"""Reporter's repro: no-op followed by a real change within the
cooldown window must still forward the real change."""
adapter = _make_adapter(watch_all=True, cooldown_seconds=60)
# First event: old_state == new_state — adapter must not forward,
# and must not consume the cooldown window.
await adapter._handle_ha_event(
_make_event("sensor.temp", "20", "20",
new_attrs={"friendly_name": "Temp"})
)
adapter.handle_message.assert_not_called()
# Second event: real change, fired immediately. Under the bug the
# no-op had already written _last_event_time, so this gets dropped.
await adapter._handle_ha_event(
_make_event("sensor.temp", "20", "21",
new_attrs={"friendly_name": "Temp"})
)
assert adapter.handle_message.await_count == 1
@pytest.mark.asyncio
async def test_no_op_does_not_write_last_event_time(self):
"""Direct structural pin: the cooldown map must be empty after
a no-op event so a future refactor can't reintroduce the leak."""
adapter = _make_adapter(watch_all=True, cooldown_seconds=60)
await adapter._handle_ha_event(
_make_event("sensor.temp", "on", "on",
new_attrs={"friendly_name": "Temp"})
)
assert adapter.handle_message.await_count == 0
assert "sensor.temp" not in adapter._last_event_time
@pytest.mark.asyncio
async def test_missing_new_state_does_not_consume_cooldown(self):
"""Malformed event (no new_state payload) must also not block the
next real change."""
adapter = _make_adapter(watch_all=True, cooldown_seconds=60)
# _format_state_change returns None for empty new_state too;
# cooldown must not be consumed in that branch either.
await adapter._handle_ha_event({
"data": {
"entity_id": "sensor.temp",
"old_state": {"state": "20", "attributes": {}},
"new_state": {}, # empty → formatter returns None
}
})
adapter.handle_message.assert_not_called()
assert "sensor.temp" not in adapter._last_event_time
await adapter._handle_ha_event(
_make_event("sensor.temp", "20", "21",
new_attrs={"friendly_name": "Temp"})
)
assert adapter.handle_message.await_count == 1
@pytest.mark.asyncio
async def test_forwarded_event_still_consumes_cooldown(self):
"""Preserved behaviour canary: once an event *is* forwarded, the
cooldown window must close for subsequent rapid events."""
adapter = _make_adapter(watch_all=True, cooldown_seconds=60)
await adapter._handle_ha_event(
_make_event("sensor.temp", "20", "21",
new_attrs={"friendly_name": "Temp"})
)
assert adapter.handle_message.await_count == 1
assert "sensor.temp" in adapter._last_event_time
# Rapid follow-up real change — cooldown blocks it (unchanged).
await adapter._handle_ha_event(
_make_event("sensor.temp", "21", "22",
new_attrs={"friendly_name": "Temp"})
)
assert adapter.handle_message.await_count == 1
@pytest.mark.asyncio
async def test_no_op_then_real_change_across_entities(self):
"""Regression: a no-op for entity A must not affect entity B's
independent cooldown accounting."""
adapter = _make_adapter(watch_all=True, cooldown_seconds=60)
await adapter._handle_ha_event(
_make_event("sensor.a", "1", "1", new_attrs={"friendly_name": "A"})
)
await adapter._handle_ha_event(
_make_event("sensor.b", "3", "4", new_attrs={"friendly_name": "B"})
)
# A was a no-op (not forwarded), B is a real change (forwarded once)
assert adapter.handle_message.await_count == 1
assert "sensor.a" not in adapter._last_event_time
assert "sensor.b" in adapter._last_event_time
# ---------------------------------------------------------------------------
# Config integration (env overrides, round-trip)
# ---------------------------------------------------------------------------