fix(managed-scope): honor managed scope in all standalone config loaders

The skin bug was one instance of a class: several subsystems build their
config dict directly from config.yaml instead of routing through
hermes_cli.config.load_config (which carries the managed merge), so they
silently ignored administrator-pinned values. Audited every config.yaml
reader and fixed the behavioral-read bypasses:

- gateway/config.py load_gateway_config (messaging gateway: session_reset,
  quick_commands, stt, model, ...)
- gateway/run.py _load_gateway_config (its read_raw_config fast path also
  skipped the merge — read_raw_config returns raw user YAML)
- tui_gateway/server.py _load_cfg (new TUI + desktop backend: skin,
  reasoning_effort, service_tier, provider_routing)
- cron/scheduler.py (scheduled-job model/reasoning/toolsets/provider_routing)
- hermes_logging.py (logging.level/max_size_mb/backup_count)
- hermes_time.py (timezone)
- hermes_cli/doctor.py (memory-provider diagnostic reads effective config)

All route through a new shared managed_scope.apply_managed_overlay() helper
that mirrors _load_config_impl (env-only expansion so a user ${VAR} can't
shadow a managed literal, root-model-string normalization, leaf-merge) and is
fail-open. cli.py's earlier inline fix is refactored onto the same helper.

Write-back paths (slash_commands, telegram/yuanbao dm_topics, profile
distribution) are deliberately left reading raw user YAML — overlaying managed
values there would persist them into the user file. The dashboard
(web_server.py) already routes through load_config and needed no change.

TUI loader caches the RAW config so _save_cfg never writes managed values to
disk. Adds test_managed_scope_overlay.py (helper) and
test_managed_scope_loaders.py (per-surface integration); mutation-checked.
This commit is contained in:
Ben 2026-06-19 13:01:48 +10:00 committed by Teknium
parent 732293cf87
commit b0e47a98f9
11 changed files with 314 additions and 23 deletions

19
cli.py
View file

@ -567,21 +567,12 @@ def load_cli_config() -> Dict[str, Any]:
# hermes_cli.config._load_config_impl (which has its own managed merge), so
# without this the entire interactive CLI/TUI surface — skin, display prefs,
# etc. read from CLI_CONFIG — would silently ignore managed scope while
# `hermes config`/`doctor`/guards (which use load_config) honor it. Mirror
# _load_config_impl: expand managed against the process env only (so a user
# ${VAR} can't shadow a managed literal), normalize its root model key so a
# managed `model: x/y` string can't clobber the dict shape callers expect,
# then leaf-merge on top. Fail-open — managed scope must never block startup.
try:
from hermes_cli import managed_scope
from hermes_cli.config import _deep_merge, _normalize_root_model_keys
# `hermes config`/`doctor`/guards (which use load_config) honor it. The
# shared helper mirrors _load_config_impl (env-only expansion, root-model
# normalization, leaf-merge) and is fail-open.
from hermes_cli import managed_scope
managed_config = managed_scope.load_managed_config()
if managed_config:
managed_expanded = _normalize_root_model_keys(_expand_env_vars(managed_config))
defaults = _deep_merge(defaults, managed_expanded)
except Exception as e: # noqa: BLE001 — never let managed scope break CLI startup
logger.warning("Failed to apply managed scope to CLI config: %s", e)
defaults = managed_scope.apply_managed_overlay(defaults)
# Apply terminal config to environment variables (so terminal_tool picks them up)
terminal_config = defaults.get("terminal", {})

View file

@ -1641,6 +1641,15 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
if os.path.exists(_cfg_path):
with open(_cfg_path, encoding="utf-8") as _f:
_cfg = yaml.safe_load(_f) or {}
# Managed scope: a scheduled job must honor administrator-pinned
# model / reasoning / toolsets / provider_routing too. This loader
# builds its own dict, so overlay managed values via the shared
# helper (fail-open, no-op when no managed scope).
try:
from hermes_cli import managed_scope
_cfg = managed_scope.apply_managed_overlay(_cfg)
except Exception:
pass
_cfg = _expand_env_vars(_cfg)
_model_cfg = _cfg.get("model", {})
if not job.get("model"):

View file

@ -810,6 +810,14 @@ def load_gateway_config() -> GatewayConfig:
with open(config_yaml_path, encoding="utf-8") as f:
yaml_cfg = yaml.safe_load(f) or {}
# Managed scope: overlay administrator-pinned values so the gateway
# honors them too. This loader builds its own dict instead of going
# through hermes_cli.config.load_config, so without this a managed
# session_reset / quick_commands / stt / model would be ignored by
# the messaging gateway. Fail-open via the shared helper.
from hermes_cli import managed_scope
yaml_cfg = managed_scope.apply_managed_overlay(yaml_cfg)
# Map config.yaml keys → GatewayConfig.from_dict() schema.
# Each key overwrites whatever gateway.json may have set.
sr = yaml_cfg.get("session_reset")

View file

@ -1985,8 +1985,14 @@ def _load_gateway_config() -> dict:
Uses the module-level ``_hermes_home`` (so tests that monkeypatch it
still see their fixture) and shares the mtime-keyed raw-yaml cache
from ``hermes_cli.config.read_raw_config`` when the paths match.
Managed scope is overlaid on the result (via the shared helper) so the
gateway honors administrator-pinned values neither read_raw_config nor a
direct yaml.safe_load carries the managed merge on its own. Fail-open.
"""
config_path = _hermes_home / 'config.yaml'
raw: dict = {}
used_canonical = False
try:
from hermes_cli.config import get_config_path, read_raw_config
# Fast path: if _hermes_home agrees with the canonical config
@ -1994,18 +2000,31 @@ def _load_gateway_config() -> dict:
# direct read (keeps test fixtures with a monkeypatched
# _hermes_home working).
if config_path == get_config_path():
return read_raw_config()
raw = read_raw_config()
used_canonical = True
except Exception:
pass
if not used_canonical:
try:
if config_path.exists():
import yaml
with open(config_path, 'r', encoding='utf-8') as f:
raw = yaml.safe_load(f) or {}
except Exception:
logger.debug("Could not load gateway config from %s", config_path)
raw = {}
# Overlay managed scope. read_raw_config() returns the user's raw YAML
# WITHOUT the managed merge (that lives in load_config/_load_config_impl),
# so the overlay is required on both paths for the gateway to honor pinned
# values. Helper is fail-open and a no-op when no managed scope exists.
try:
if config_path.exists():
import yaml
with open(config_path, 'r', encoding='utf-8') as f:
return yaml.safe_load(f) or {}
from hermes_cli import managed_scope
raw = managed_scope.apply_managed_overlay(raw if isinstance(raw, dict) else {})
except Exception:
logger.debug("Could not load gateway config from %s", config_path)
return {}
pass
return raw if isinstance(raw, dict) else {}
def _load_gateway_runtime_config() -> dict:

View file

@ -2179,6 +2179,11 @@ def run_doctor(args):
if _mem_cfg_path.exists():
with open(_mem_cfg_path, encoding="utf-8") as _f:
_raw_cfg = _yaml.safe_load(_f) or {}
try:
from hermes_cli import managed_scope
_raw_cfg = managed_scope.apply_managed_overlay(_raw_cfg)
except Exception:
pass
_active_memory_provider = (_raw_cfg.get("memory") or {}).get("provider", "")
except Exception:
pass

View file

@ -134,6 +134,49 @@ def load_managed_env() -> Dict[str, str]:
return parsed if isinstance(parsed, dict) else {}
def apply_managed_overlay(config: dict) -> dict:
"""Overlay administrator-pinned config values on top of an already-built dict.
The single, shared way for any config loader that builds its own dict
(rather than going through hermes_cli.config.load_config) to honor managed
scope. Mirrors hermes_cli.config._load_config_impl's managed merge exactly:
* expand the managed config's ``${VAR}`` refs against the PROCESS env only
(never user-config-defined refs), so a user cannot shadow a managed
literal via a ${VAR} they control;
* normalize the managed config's root ``model`` key (a bare ``model: x/y``
string is promoted to ``model.default``) so it can't clobber the dict
shape callers expect;
* leaf-level deep-merge managed ON TOP, so managed wins per-leaf while
sibling keys stay user-controlled.
Fail-open: returns ``config`` unchanged if no managed scope is present or on
any error managed scope must never break a caller's startup. Mutates and
returns ``config`` (callers pass a dict they own).
"""
try:
managed = load_managed_config()
if not managed:
return config
# Imported lazily to avoid an import cycle (config imports managed_scope).
from hermes_cli.config import _deep_merge, _expand_env_vars, _normalize_root_model_keys
managed_expanded = _normalize_root_model_keys(_expand_env_vars(managed))
# A bare ``model: x/y`` string in the managed file must merge as
# ``model.default`` — otherwise _deep_merge would replace the caller's
# ``model`` dict with a string and break every ``cfg["model"]["..."]``
# read. _normalize_root_model_keys only promotes the string when there
# are root provider/base_url keys to migrate, so handle the bare case
# here (matches cli.py's own string-model handling).
if isinstance(managed_expanded.get("model"), str):
managed_expanded = dict(managed_expanded)
managed_expanded["model"] = {"default": managed_expanded["model"]}
return _deep_merge(config, managed_expanded)
except Exception: # noqa: BLE001 — overlay must never break a caller
logger.warning("managed scope: failed to apply config overlay", exc_info=True)
return config
def _parse_env(f) -> Dict[str, str]:
out: Dict[str, str] = {}
for line in f:

View file

@ -553,6 +553,13 @@ def _read_logging_config():
if config_path.exists():
with open(config_path, "r", encoding="utf-8") as f:
cfg = yaml.safe_load(f) or {}
# Managed scope: an administrator can pin logging.* too. Overlay via
# the shared helper (fail-open) since this reads config.yaml directly.
try:
from hermes_cli import managed_scope
cfg = managed_scope.apply_managed_overlay(cfg)
except Exception:
pass
log_cfg = cfg.get("logging", {})
if isinstance(log_cfg, dict):
return (

View file

@ -52,6 +52,13 @@ def _resolve_timezone_name() -> str:
if config_path.exists():
with open(config_path, encoding="utf-8") as f:
cfg = yaml.safe_load(f) or {}
# Managed scope: an administrator can pin ``timezone`` too. Overlay
# via the shared helper (fail-open) since this reads config.yaml directly.
try:
from hermes_cli import managed_scope
cfg = managed_scope.apply_managed_overlay(cfg)
except Exception:
pass
tz_cfg = cfg.get("timezone", "")
if isinstance(tz_cfg, str) and tz_cfg.strip():
return tz_cfg.strip()

View file

@ -0,0 +1,113 @@
"""Each standalone config loader (gateway, TUI/desktop, cron) must honor managed scope.
These loaders build their own config dict instead of routing through
hermes_cli.config.load_config, so the managed overlay has to be wired into each.
This is the regression guard for the whole bug class (a managed display.skin was
silently ignored by the TUI; the same gap existed in the gateway and cron).
"""
import textwrap
import pytest
@pytest.fixture
def homes(tmp_path, monkeypatch):
home = tmp_path / "home"
home.mkdir()
managed = tmp_path / "managed"
managed.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setenv("HERMES_MANAGED_DIR", str(managed))
import hermes_cli.config as cfg
from hermes_cli import managed_scope
cfg._LOAD_CONFIG_CACHE.clear()
cfg._RAW_CONFIG_CACHE.clear()
managed_scope.invalidate_managed_cache()
return home, managed
def _seed(home, managed, *, user, mgd):
(home / "config.yaml").write_text(textwrap.dedent(user), encoding="utf-8")
(managed / "config.yaml").write_text(textwrap.dedent(mgd), encoding="utf-8")
import hermes_cli.config as cfg
from hermes_cli import managed_scope
cfg._LOAD_CONFIG_CACHE.clear()
cfg._RAW_CONFIG_CACHE.clear()
managed_scope.invalidate_managed_cache()
def test_gateway_run_loader_honors_managed(homes, monkeypatch):
home, managed = homes
_seed(home, managed, user="model:\n default: user/m\n", mgd="model:\n default: org/m\n")
import gateway.run as gr
monkeypatch.setattr(gr, "_hermes_home", home, raising=False)
cfg = gr._load_gateway_config()
assert (cfg.get("model") or {}).get("default") == "org/m"
def test_gateway_config_loader_honors_managed(homes, monkeypatch):
home, managed = homes
_seed(
home,
managed,
user="group_sessions_per_user: false\n",
mgd="group_sessions_per_user: true\n",
)
import gateway.config as gc
# load_gateway_config resolves home via get_hermes_home() (HERMES_HOME env).
cfg = gc.load_gateway_config()
# Managed value should have flowed into the GatewayConfig.
assert cfg.group_sessions_per_user is True
def test_tui_loader_honors_managed(homes, monkeypatch):
home, managed = homes
_seed(home, managed, user="display:\n skin: user\n", mgd="display:\n skin: charizard\n")
import tui_gateway.server as ts
monkeypatch.setattr(ts, "_hermes_home", home, raising=False)
monkeypatch.setattr(ts, "_cfg_cache", None, raising=False)
monkeypatch.setattr(ts, "_cfg_mtime", None, raising=False)
monkeypatch.setattr(ts, "get_hermes_home_override", lambda: None, raising=False)
cfg = ts._load_cfg()
assert (cfg.get("display") or {}).get("skin") == "charizard"
def test_tui_loader_does_not_persist_managed_back(homes, monkeypatch):
"""The TUI caches RAW config so _save_cfg never writes managed values to disk."""
home, managed = homes
_seed(home, managed, user="display:\n skin: user\n", mgd="display:\n skin: charizard\n")
import tui_gateway.server as ts
monkeypatch.setattr(ts, "_hermes_home", home, raising=False)
monkeypatch.setattr(ts, "_cfg_cache", None, raising=False)
monkeypatch.setattr(ts, "_cfg_mtime", None, raising=False)
monkeypatch.setattr(ts, "get_hermes_home_override", lambda: None, raising=False)
ts._load_cfg() # populates the cache
# The cache must hold the RAW user value, not the managed overlay, so a
# subsequent _save_cfg can't bake the managed skin into the user file.
assert (ts._cfg_cache.get("display") or {}).get("skin") == "user"
def test_logging_config_honors_managed(homes, monkeypatch):
home, managed = homes
_seed(home, managed, user="logging:\n level: INFO\n", mgd="logging:\n level: DEBUG\n")
import hermes_logging
level, _max, _bk = hermes_logging._read_logging_config()
assert level == "DEBUG"
def test_timezone_honors_managed(homes, monkeypatch):
home, managed = homes
# hermes_time checks an env override first; ensure it's unset so config wins.
monkeypatch.delenv("HERMES_TIMEZONE", raising=False)
monkeypatch.delenv("TZ", raising=False)
_seed(home, managed, user="timezone: America/New_York\n", mgd="timezone: Asia/Tokyo\n")
import hermes_time
assert hermes_time._resolve_timezone_name() == "Asia/Tokyo"

View file

@ -0,0 +1,69 @@
"""apply_managed_overlay() — the shared helper used by every standalone loader."""
import textwrap
import pytest
@pytest.fixture
def managed(tmp_path, monkeypatch):
md = tmp_path / "managed"
md.mkdir()
monkeypatch.setenv("HERMES_MANAGED_DIR", str(md))
from hermes_cli import managed_scope
managed_scope.invalidate_managed_cache()
return md
def _write(md, body):
(md / "config.yaml").write_text(textwrap.dedent(body), encoding="utf-8")
from hermes_cli import managed_scope
managed_scope.invalidate_managed_cache()
def test_overlay_noop_without_scope(tmp_path, monkeypatch):
from hermes_cli import managed_scope
monkeypatch.setenv("HERMES_MANAGED_DIR", str(tmp_path / "nope"))
managed_scope.invalidate_managed_cache()
src = {"display": {"skin": "user"}}
assert managed_scope.apply_managed_overlay(src) == {"display": {"skin": "user"}}
def test_overlay_managed_wins(managed):
from hermes_cli import managed_scope
_write(managed, "display:\n skin: charizard\n")
out = managed_scope.apply_managed_overlay({"display": {"skin": "user"}})
assert out["display"]["skin"] == "charizard"
def test_overlay_preserves_user_siblings(managed):
from hermes_cli import managed_scope
_write(managed, "display:\n skin: charizard\n")
out = managed_scope.apply_managed_overlay(
{"display": {"skin": "user", "show_reasoning": True}}
)
assert out["display"]["skin"] == "charizard"
assert out["display"]["show_reasoning"] is True
def test_overlay_normalizes_root_model_string(managed):
"""A managed bare `model: x/y` must promote to model.default, not clobber the dict."""
from hermes_cli import managed_scope
_write(managed, "model: org/locked\n")
out = managed_scope.apply_managed_overlay({"model": {"default": "user/m", "fallback": "u/fb"}})
assert out["model"]["default"] == "org/locked" # managed wins
assert out["model"]["fallback"] == "u/fb" # user sibling preserved (dict shape intact)
def test_overlay_user_envref_cannot_shadow_managed_literal(managed, monkeypatch):
from hermes_cli import managed_scope
monkeypatch.setenv("EVIL", "user/override")
_write(managed, "model:\n default: managed/locked\n")
out = managed_scope.apply_managed_overlay({"model": {"default": "${EVIL}"}})
assert out["model"]["default"] == "managed/locked"

View file

@ -1339,22 +1339,42 @@ def _load_cfg() -> dict:
mtime = p.stat().st_mtime if p.exists() else None
with _cfg_lock:
if _cfg_cache is not None and _cfg_mtime == mtime and _cfg_path == p:
return copy.deepcopy(_cfg_cache)
return _apply_managed(copy.deepcopy(_cfg_cache))
if p.exists():
with open(p, encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
else:
data = {}
with _cfg_lock:
# Cache the RAW user config (no managed overlay) so _save_cfg, which
# writes _cfg_cache back to disk, never persists managed values into
# the user's file. The managed overlay is applied on every return
# path instead (read-side only).
_cfg_cache = copy.deepcopy(data)
_cfg_mtime = mtime
_cfg_path = p
return data
return _apply_managed(data)
except Exception:
pass
return {}
def _apply_managed(cfg: dict) -> dict:
"""Overlay administrator-pinned managed-scope values on a config dict.
The TUI/desktop backend builds config independently of
hermes_cli.config.load_config, so without this a managed skin / reasoning_effort
/ service_tier / provider_routing would be silently ignored here. Read-side
only the raw user config is what gets cached and saved. Fail-open.
"""
try:
from hermes_cli import managed_scope
return managed_scope.apply_managed_overlay(cfg if isinstance(cfg, dict) else {})
except Exception:
return cfg
def _save_cfg(cfg: dict):
global _cfg_cache, _cfg_mtime, _cfg_path
import yaml