fix(dashboard): use await-safe config-only scope for /api/status profile

_profile_scope swaps process-global skills_tool/skill_manager module
attrs under an RLock; /api/status holds that scope across the
run_in_executor remote-health probe await, so a concurrent
/api/skills?profile=X request can cross-restore the status profile's
skill dir on its finally. Add _config_profile_scope (contextvar-only,
task-local, await-safe) and use it for status, which only resolves
get_hermes_home() at call time for config/env/gateway state and never
needs the skills-module globals.
This commit is contained in:
teknium1 2026-06-17 05:09:20 -07:00 committed by Teknium
parent 674e8b098a
commit dc86d48a3e

View file

@ -1620,8 +1620,14 @@ async def get_status(profile: Optional[str] = None):
# Plain /api/status stays the machine-level public liveness probe. The
# dashboard adds ?profile= when its management switcher targets another
# profile, so its gateway badge reflects the selected profile.
#
# Use the config-only (contextvar) scope, NOT _profile_scope: this handler
# awaits the remote-health probe, and _profile_scope swaps process-global
# skills-module attributes that a concurrent request would cross-restore
# across that await. Status only resolves get_hermes_home() at call time
# (config/env/gateway state), which the task-local contextvar covers.
if requested_profile and requested_profile.lower() != "current":
status_scope = _profile_scope(requested_profile)
status_scope = _config_profile_scope(requested_profile)
status_scope.__enter__()
try:
@ -9498,6 +9504,40 @@ def _profile_scope(profile: Optional[str]):
reset_hermes_home_override(token)
@contextmanager
def _config_profile_scope(profile: Optional[str]):
"""Await-safe, config-only profile scope for handlers that ``await``.
Unlike ``_profile_scope`` this touches ONLY the context-local
``set_hermes_home_override`` contextvar it does NOT swap the
process-global ``skills_tool``/``skill_manager`` module attributes.
Those globals are shared across all event-loop tasks, so holding them
across an ``await`` lets a concurrent skills request restore THIS
request's profile dir on its ``finally`` (cross-contamination). The
contextvar override is task-local and survives an ``await`` cleanly,
which is all endpoints that resolve ``get_hermes_home()`` at call time
(config, env, gateway status) actually need.
None/""/"current" means the dashboard's own profile — no override.
"""
requested = (profile or "").strip()
if not requested or requested.lower() == "current":
yield None
return
from hermes_constants import (
set_hermes_home_override,
reset_hermes_home_override,
)
profile_dir = _resolve_profile_dir(requested)
token = set_hermes_home_override(str(profile_dir))
try:
yield profile_dir
finally:
reset_hermes_home_override(token)
class SkillToggle(BaseModel):
name: str
enabled: bool