fix(gateway): replace os.environ session state with contextvars for concurrency safety

When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.

Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.

Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
  helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
  accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
  terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
  agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API

Fixes #7358

Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
This commit is contained in:
0xFrank-eth 2026-04-10 16:50:56 -07:00 committed by Teknium
parent dab5ec8245
commit e8034e2f6a
10 changed files with 255 additions and 52 deletions

View file

@ -3,9 +3,15 @@ import os
from gateway.config import Platform
from gateway.run import GatewayRunner
from gateway.session import SessionContext, SessionSource
from gateway.session_context import (
get_session_env,
set_session_vars,
clear_session_vars,
)
def test_set_session_env_includes_thread_id(monkeypatch):
def test_set_session_env_sets_contextvars(monkeypatch):
"""_set_session_env should populate contextvars, not os.environ."""
runner = object.__new__(GatewayRunner)
source = SessionSource(
platform=Platform.TELEGRAM,
@ -21,25 +27,93 @@ def test_set_session_env_includes_thread_id(monkeypatch):
monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
monkeypatch.delenv("HERMES_SESSION_THREAD_ID", raising=False)
runner._set_session_env(context)
tokens = runner._set_session_env(context)
assert os.getenv("HERMES_SESSION_PLATFORM") == "telegram"
assert os.getenv("HERMES_SESSION_CHAT_ID") == "-1001"
assert os.getenv("HERMES_SESSION_CHAT_NAME") == "Group"
assert os.getenv("HERMES_SESSION_THREAD_ID") == "17585"
# Values should be readable via get_session_env (contextvar path)
assert get_session_env("HERMES_SESSION_PLATFORM") == "telegram"
assert get_session_env("HERMES_SESSION_CHAT_ID") == "-1001"
assert get_session_env("HERMES_SESSION_CHAT_NAME") == "Group"
assert get_session_env("HERMES_SESSION_THREAD_ID") == "17585"
# os.environ should NOT be touched
assert os.getenv("HERMES_SESSION_PLATFORM") is None
assert os.getenv("HERMES_SESSION_THREAD_ID") is None
# Clean up
runner._clear_session_env(tokens)
def test_clear_session_env_removes_thread_id(monkeypatch):
def test_clear_session_env_restores_previous_state(monkeypatch):
"""_clear_session_env should restore contextvars to their pre-handler values."""
runner = object.__new__(GatewayRunner)
monkeypatch.setenv("HERMES_SESSION_PLATFORM", "telegram")
monkeypatch.setenv("HERMES_SESSION_CHAT_ID", "-1001")
monkeypatch.setenv("HERMES_SESSION_CHAT_NAME", "Group")
monkeypatch.setenv("HERMES_SESSION_THREAD_ID", "17585")
monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
monkeypatch.delenv("HERMES_SESSION_THREAD_ID", raising=False)
runner._clear_session_env()
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="-1001",
chat_name="Group",
chat_type="group",
thread_id="17585",
)
context = SessionContext(source=source, connected_platforms=[], home_channels={})
assert os.getenv("HERMES_SESSION_PLATFORM") is None
assert os.getenv("HERMES_SESSION_CHAT_ID") is None
assert os.getenv("HERMES_SESSION_CHAT_NAME") is None
assert os.getenv("HERMES_SESSION_THREAD_ID") is None
tokens = runner._set_session_env(context)
assert get_session_env("HERMES_SESSION_PLATFORM") == "telegram"
runner._clear_session_env(tokens)
# After clear, contextvars should return to defaults (empty)
assert get_session_env("HERMES_SESSION_PLATFORM") == ""
assert get_session_env("HERMES_SESSION_CHAT_ID") == ""
assert get_session_env("HERMES_SESSION_CHAT_NAME") == ""
assert get_session_env("HERMES_SESSION_THREAD_ID") == ""
def test_get_session_env_falls_back_to_os_environ(monkeypatch):
"""get_session_env should fall back to os.environ when contextvar is unset."""
monkeypatch.setenv("HERMES_SESSION_PLATFORM", "discord")
# No contextvar set — should read from os.environ
assert get_session_env("HERMES_SESSION_PLATFORM") == "discord"
# Now set a contextvar — should prefer it
tokens = set_session_vars(platform="telegram")
assert get_session_env("HERMES_SESSION_PLATFORM") == "telegram"
# Restore — should fall back to os.environ again
clear_session_vars(tokens)
assert get_session_env("HERMES_SESSION_PLATFORM") == "discord"
def test_get_session_env_default_when_nothing_set(monkeypatch):
"""get_session_env returns default when neither contextvar nor env is set."""
monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
assert get_session_env("HERMES_SESSION_PLATFORM") == ""
assert get_session_env("HERMES_SESSION_PLATFORM", "fallback") == "fallback"
def test_set_session_env_handles_missing_optional_fields():
"""_set_session_env should handle None chat_name and thread_id gracefully."""
runner = object.__new__(GatewayRunner)
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="-1001",
chat_name=None,
chat_type="private",
thread_id=None,
)
context = SessionContext(source=source, connected_platforms=[], home_channels={})
tokens = runner._set_session_env(context)
assert get_session_env("HERMES_SESSION_PLATFORM") == "telegram"
assert get_session_env("HERMES_SESSION_CHAT_ID") == "-1001"
assert get_session_env("HERMES_SESSION_CHAT_NAME") == ""
assert get_session_env("HERMES_SESSION_THREAD_ID") == ""
runner._clear_session_env(tokens)