diff --git a/gateway/run.py b/gateway/run.py index 03c1e3d8f8..4c82a9274b 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -638,6 +638,7 @@ class GatewayRunner: _restart_via_service: bool = False _stop_task: Optional[asyncio.Task] = None _session_model_overrides: Dict[str, Dict[str, str]] = {} + _session_reasoning_overrides: Dict[str, Dict[str, Any]] = {} def __init__(self, config: Optional[GatewayConfig] = None): self.config = config or load_gateway_config() @@ -701,6 +702,9 @@ class GatewayRunner: # Per-session model overrides from /model command. # Key: session_key, Value: dict with model/provider/api_key/base_url/api_mode self._session_model_overrides: Dict[str, Dict[str, str]] = {} + # Per-session reasoning effort overrides from /reasoning. + # Key: session_key, Value: parsed reasoning config dict. + self._session_reasoning_overrides: Dict[str, Dict[str, Any]] = {} # Track pending exec approvals per session # Key: session_key, Value: {"command": str, "pattern_key": str, ...} self._pending_approvals: Dict[str, Dict[str, Any]] = {} @@ -1263,6 +1267,66 @@ class GatewayRunner: logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort) return result + @staticmethod + def _parse_reasoning_command_args(raw_args: str) -> tuple[str, bool]: + """Parse `/reasoning` args into `(value, persist_global)`. + + `/reasoning ` is session-scoped by default. `--global` may be + supplied in any position to persist the change to config.yaml. + """ + import shlex + + text = str(raw_args or "").strip().replace("β€”", "--") + if not text: + return "", False + try: + tokens = shlex.split(text) + except ValueError: + tokens = text.split() + + persist_global = False + value_tokens = [] + for token in tokens: + if token == "--global": + persist_global = True + else: + value_tokens.append(token) + return " ".join(value_tokens).strip().lower(), persist_global + + def _resolve_session_reasoning_config( + self, + *, + source: Optional[SessionSource] = None, + session_key: Optional[str] = None, + ) -> dict | None: + """Resolve reasoning effort for a session, honoring session overrides.""" + resolved_session_key = session_key + if not resolved_session_key and source is not None: + try: + resolved_session_key = self._session_key_for_source(source) + except Exception: + resolved_session_key = None + + overrides = getattr(self, "_session_reasoning_overrides", {}) or {} + if resolved_session_key and resolved_session_key in overrides: + return overrides[resolved_session_key] + return self._load_reasoning_config() + + def _set_session_reasoning_override( + self, + session_key: str, + reasoning_config: Optional[dict], + ) -> None: + """Set or clear the session-scoped reasoning override.""" + if not session_key: + return + if not hasattr(self, "_session_reasoning_overrides"): + self._session_reasoning_overrides = {} + if reasoning_config is None: + self._session_reasoning_overrides.pop(session_key, None) + else: + self._session_reasoning_overrides[session_key] = dict(reasoning_config) + @staticmethod def _load_service_tier() -> str | None: """Load Priority Processing setting from config.yaml. @@ -3982,6 +4046,8 @@ class GatewayRunner: # Get or create session session_entry = self.session_store.get_or_create_session(source) session_key = session_entry.session_key + if getattr(session_entry, "was_auto_reset", False): + self._set_session_reasoning_override(session_key, None) # Emit session:start for new or auto-reset sessions _is_new_session = ( @@ -4652,6 +4718,7 @@ class GatewayRunner: self.session_store.reset_session(session_key) self._evict_cached_agent(session_key) self._session_model_overrides.pop(session_key, None) + self._set_session_reasoning_override(session_key, None) response = (response or "") + ( "\n\nπŸ”„ Session auto-reset β€” the conversation exceeded the " "maximum context size and could not be compressed further. " @@ -4928,9 +4995,10 @@ class GatewayRunner: # Reset the session new_entry = self.session_store.reset_session(session_key) - # Clear any session-scoped model override so the next agent picks up - # the configured default instead of the previously switched model. + # Clear any session-scoped model/reasoning overrides so the next agent + # picks up configured defaults instead of previous session switches. self._session_model_overrides.pop(session_key, None) + self._set_session_reasoning_override(session_key, None) # Clear session-scoped dangerous-command approvals and /yolo state. # /new is a conversation-boundary operation β€” approval state from the @@ -6417,7 +6485,7 @@ class GatewayRunner: pr = self._provider_routing max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90")) - reasoning_config = self._load_reasoning_config() + reasoning_config = self._resolve_session_reasoning_config(source=source) self._reasoning_config = reasoning_config self._service_tier = self._load_service_tier() turn_route = self._resolve_turn_agent_config(prompt, model, runtime_kwargs) @@ -6590,7 +6658,10 @@ class GatewayRunner: return platform_key = _platform_config_key(source.platform) - reasoning_config = self._load_reasoning_config() + reasoning_config = self._resolve_session_reasoning_config( + source=source, + session_key=session_key, + ) self._service_tier = self._load_service_tier() turn_route = self._resolve_turn_agent_config(question, model, runtime_kwargs) pr = self._provider_routing @@ -6696,17 +6767,24 @@ class GatewayRunner: """Handle /reasoning command β€” manage reasoning effort and display toggle. Usage: - /reasoning Show current effort level and display state - /reasoning Set reasoning effort (none, minimal, low, medium, high, xhigh) - /reasoning show|on Show model reasoning in responses - /reasoning hide|off Hide model reasoning from responses + /reasoning Show current effort level and display state + /reasoning Set reasoning effort for this session only + /reasoning --global Persist reasoning effort to config.yaml + /reasoning reset Clear this session's reasoning override + /reasoning show|on Show model reasoning in responses + /reasoning hide|off Hide model reasoning from responses """ import yaml - args = event.get_command_args().strip().lower() + raw_args = event.get_command_args().strip() + args, persist_global = self._parse_reasoning_command_args(raw_args) config_path = _hermes_home / "config.yaml" - self._reasoning_config = self._load_reasoning_config() + session_key = self._session_key_for_source(event.source) self._show_reasoning = self._load_show_reasoning() + self._reasoning_config = self._resolve_session_reasoning_config( + source=event.source, + session_key=session_key, + ) def _save_config_key(key_path: str, value): """Save a dot-separated key to config.yaml.""" @@ -6728,7 +6806,7 @@ class GatewayRunner: logger.error("Failed to save config key %s: %s", key_path, e) return False - if not args: + if not raw_args: # Show current state rc = self._reasoning_config if rc is None: @@ -6738,11 +6816,14 @@ class GatewayRunner: else: level = rc.get("effort", "medium") display_state = "on βœ“" if self._show_reasoning else "off" + has_session_override = session_key in (getattr(self, "_session_reasoning_overrides", {}) or {}) + scope = "session override" if has_session_override else "global config" return ( "🧠 **Reasoning Settings**\n\n" f"**Effort:** `{level}`\n" + f"**Scope:** {scope}\n" f"**Display:** {display_state}\n\n" - "_Usage:_ `/reasoning `" + "_Usage:_ `/reasoning [--global]`" ) # Display toggle (per-platform) @@ -6762,22 +6843,38 @@ class GatewayRunner: # Effort level change effort = args.strip() + if effort == "reset": + if persist_global: + return "⚠️ `/reasoning reset --global` is not supported. Use `/reasoning --global` to change the global default." + self._set_session_reasoning_override(session_key, None) + self._reasoning_config = self._load_reasoning_config() + self._evict_cached_agent(session_key) + return "🧠 βœ“ Session reasoning override cleared; falling back to global config." if effort == "none": parsed = {"enabled": False} elif effort in ("minimal", "low", "medium", "high", "xhigh"): parsed = {"enabled": True, "effort": effort} else: return ( - f"⚠️ Unknown argument: `{effort}`\n\n" + f"⚠️ Unknown argument: `{effort or raw_args.lower()}`\n\n" "**Valid levels:** none, minimal, low, medium, high, xhigh\n" - "**Display:** show, hide" + "**Display:** show, hide\n" + "**Persist:** add `--global` to save beyond this session" ) self._reasoning_config = parsed - if _save_config_key("agent.reasoning_effort", effort): - return f"🧠 βœ“ Reasoning effort set to `{effort}` (saved to config)\n_(takes effect on next message)_" - else: - return f"🧠 βœ“ Reasoning effort set to `{effort}` (this session only)" + if persist_global: + if _save_config_key("agent.reasoning_effort", effort): + self._set_session_reasoning_override(session_key, None) + self._evict_cached_agent(session_key) + return f"🧠 βœ“ Reasoning effort set to `{effort}` (saved to config)\n_(takes effect on next message)_" + self._set_session_reasoning_override(session_key, parsed) + self._evict_cached_agent(session_key) + return f"🧠 βœ“ Reasoning effort set to `{effort}` (session only β€” config save failed)\n_(takes effect on next message)_" + + self._set_session_reasoning_override(session_key, parsed) + self._evict_cached_agent(session_key) + return f"🧠 βœ“ Reasoning effort set to `{effort}` (session only β€” add `--global` to persist)\n_(takes effect on next message)_" async def _handle_fast_command(self, event: MessageEvent) -> str: """Handle /fast β€” mirror the CLI Priority Processing toggle in gateway chats.""" @@ -9579,7 +9676,10 @@ class GatewayRunner: } pr = self._provider_routing - reasoning_config = self._load_reasoning_config() + reasoning_config = self._resolve_session_reasoning_config( + source=source, + session_key=session_key, + ) self._reasoning_config = reasoning_config self._service_tier = self._load_service_tier() # Set up stream consumer for token streaming or interim commentary. diff --git a/tests/gateway/test_reasoning_command.py b/tests/gateway/test_reasoning_command.py index e39ed1123d..5020df30a7 100644 --- a/tests/gateway/test_reasoning_command.py +++ b/tests/gateway/test_reasoning_command.py @@ -33,6 +33,7 @@ def _make_runner(): runner._ephemeral_system_prompt = "" runner._prefill_messages = [] runner._reasoning_config = None + runner._session_reasoning_overrides = {} runner._show_reasoning = False runner._provider_routing = {} runner._fallback_model = None @@ -76,6 +77,10 @@ class TestReasoningCommand: source = inspect.getsource(gateway_run.GatewayRunner._handle_message) assert '"reasoning"' in source + def test_parse_reasoning_command_args_accepts_ascii_and_smart_global_flags(self): + assert gateway_run.GatewayRunner._parse_reasoning_command_args("high --global") == ("high", True) + assert gateway_run.GatewayRunner._parse_reasoning_command_args("β€”global xhigh") == ("xhigh", True) + @pytest.mark.asyncio async def test_reasoning_command_reloads_current_state_from_config(self, tmp_path, monkeypatch): hermes_home = tmp_path / "hermes" @@ -111,13 +116,90 @@ class TestReasoningCommand: runner = _make_runner() runner._reasoning_config = {"enabled": True, "effort": "medium"} - result = await runner._handle_reasoning_command(_make_event("/reasoning low")) + result = await runner._handle_reasoning_command(_make_event("/reasoning low --global")) saved = yaml.safe_load(config_path.read_text(encoding="utf-8")) assert saved["agent"]["reasoning_effort"] == "low" assert runner._reasoning_config == {"enabled": True, "effort": "low"} assert "takes effect on next message" in result + @pytest.mark.asyncio + async def test_handle_reasoning_command_defaults_to_session_only(self, tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + hermes_home.mkdir() + config_path = hermes_home / "config.yaml" + config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8") + + monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home) + + runner = _make_runner() + event = _make_event("/reasoning high") + session_key = runner._session_key_for_source(event.source) + + result = await runner._handle_reasoning_command(event) + + saved = yaml.safe_load(config_path.read_text(encoding="utf-8")) + assert saved["agent"]["reasoning_effort"] == "medium" + assert runner._session_reasoning_overrides[session_key] == {"enabled": True, "effort": "high"} + assert runner._reasoning_config == {"enabled": True, "effort": "high"} + assert "session only" in result + + @pytest.mark.asyncio + async def test_reasoning_global_clears_existing_session_override(self, tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + hermes_home.mkdir() + config_path = hermes_home / "config.yaml" + config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8") + + monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home) + + runner = _make_runner() + event = _make_event("/reasoning low --global") + session_key = runner._session_key_for_source(event.source) + runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "xhigh"} + + result = await runner._handle_reasoning_command(event) + + saved = yaml.safe_load(config_path.read_text(encoding="utf-8")) + assert saved["agent"]["reasoning_effort"] == "low" + assert session_key not in runner._session_reasoning_overrides + assert "saved to config" in result + + @pytest.mark.asyncio + async def test_reasoning_reset_clears_session_override_without_config_write(self, tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + hermes_home.mkdir() + config_path = hermes_home / "config.yaml" + config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8") + + monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home) + + runner = _make_runner() + event = _make_event("/reasoning reset") + session_key = runner._session_key_for_source(event.source) + runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "xhigh"} + + result = await runner._handle_reasoning_command(event) + + saved = yaml.safe_load(config_path.read_text(encoding="utf-8")) + assert saved["agent"]["reasoning_effort"] == "medium" + assert session_key not in runner._session_reasoning_overrides + assert "cleared" in result + + def test_resolve_session_reasoning_prefers_session_override(self, tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + hermes_home.mkdir() + (hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: low\n", encoding="utf-8") + + monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home) + + runner = _make_runner() + source = _make_event("/reasoning").source + session_key = runner._session_key_for_source(source) + runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "xhigh"} + + assert runner._resolve_session_reasoning_config(source=source) == {"enabled": True, "effort": "xhigh"} + def test_run_agent_reloads_reasoning_config_per_message(self, tmp_path, monkeypatch): hermes_home = tmp_path / "hermes" hermes_home.mkdir() @@ -167,6 +249,56 @@ class TestReasoningCommand: assert _CapturingAgent.last_init is not None assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "low"} + def test_run_agent_prefers_session_reasoning_override(self, tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + hermes_home.mkdir() + (hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: low\n", encoding="utf-8") + + monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home) + monkeypatch.setattr(gateway_run, "_env_path", hermes_home / ".env") + monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None) + monkeypatch.setattr( + gateway_run, + "_resolve_runtime_agent_kwargs", + lambda: { + "provider": "openrouter", + "api_mode": "chat_completions", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "***", + }, + ) + fake_run_agent = types.ModuleType("run_agent") + fake_run_agent.AIAgent = _CapturingAgent + monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent) + + _CapturingAgent.last_init = None + runner = _make_runner() + session_key = "agent:main:local:dm" + runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"} + + source = SessionSource( + platform=Platform.LOCAL, + chat_id="cli", + chat_name="CLI", + chat_type="dm", + user_id="user-1", + ) + + result = asyncio.run( + runner._run_agent( + message="ping", + context_prompt="", + history=[], + source=source, + session_id="session-1", + session_key=session_key, + ) + ) + + assert result["final_response"] == "ok" + assert _CapturingAgent.last_init is not None + assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"} + def test_run_agent_includes_enabled_mcp_servers_in_gateway_toolsets(self, tmp_path, monkeypatch): hermes_home = tmp_path / "hermes" hermes_home.mkdir() diff --git a/tests/gateway/test_session_model_override_routing.py b/tests/gateway/test_session_model_override_routing.py index 340d01fdce..edada059da 100644 --- a/tests/gateway/test_session_model_override_routing.py +++ b/tests/gateway/test_session_model_override_routing.py @@ -54,6 +54,7 @@ def _make_runner(): runner._background_tasks = set() runner._session_db = None runner._session_model_overrides = {} + runner._session_reasoning_overrides = {} runner._pending_model_notes = {} runner._pending_approvals = {} runner._agent_cache = {} @@ -102,6 +103,7 @@ def test_run_agent_prefers_session_override_over_global_runtime(monkeypatch): ) session_key = "agent:main:local:dm" runner._session_model_overrides[session_key] = _codex_override() + runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"} result = asyncio.run( runner._run_agent( @@ -121,6 +123,7 @@ def test_run_agent_prefers_session_override_over_global_runtime(monkeypatch): assert _CapturingAgent.last_init["api_mode"] == "codex_responses" assert _CapturingAgent.last_init["base_url"] == "https://chatgpt.com/backend-api/codex" assert _CapturingAgent.last_init["api_key"] == "***" + assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"} @pytest.mark.asyncio @@ -149,6 +152,7 @@ async def test_background_task_prefers_session_override_over_global_runtime(monk ) session_key = runner._session_key_for_source(source) runner._session_model_overrides[session_key] = _codex_override() + runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"} await runner._run_background_task("say hello", source, "bg_test") @@ -158,3 +162,4 @@ async def test_background_task_prefers_session_override_over_global_runtime(monk assert _CapturingAgent.last_init["api_mode"] == "codex_responses" assert _CapturingAgent.last_init["base_url"] == "https://chatgpt.com/backend-api/codex" assert _CapturingAgent.last_init["api_key"] == "***" + assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"} diff --git a/tests/gateway/test_session_model_reset.py b/tests/gateway/test_session_model_reset.py index 6529f3a11d..025487953d 100644 --- a/tests/gateway/test_session_model_reset.py +++ b/tests/gateway/test_session_model_reset.py @@ -1,4 +1,4 @@ -"""Tests that /new (and its /reset alias) clears the session-scoped model override.""" +"""Tests that /new (and its /reset alias) clears session-scoped overrides.""" from datetime import datetime from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock @@ -37,6 +37,7 @@ def _make_runner(): runner._voice_mode = {} runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False) runner._session_model_overrides = {} + runner._session_reasoning_overrides = {} runner._pending_model_notes = {} runner._background_tasks = set() @@ -75,14 +76,16 @@ async def test_new_command_clears_session_model_override(): runner._session_model_overrides[session_key] = { "model": "gpt-4o", "provider": "openai", - "api_key": "sk-test", + "api_key": "***", "base_url": "", "api_mode": "openai", } + runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"} await runner._handle_reset_command(_make_event("/new")) assert session_key not in runner._session_model_overrides + assert session_key not in runner._session_reasoning_overrides @pytest.mark.asyncio @@ -92,10 +95,12 @@ async def test_new_command_no_override_is_noop(): session_key = build_session_key(_make_source()) assert session_key not in runner._session_model_overrides + assert session_key not in runner._session_reasoning_overrides await runner._handle_reset_command(_make_event("/new")) assert session_key not in runner._session_model_overrides + assert session_key not in runner._session_reasoning_overrides @pytest.mark.asyncio @@ -115,12 +120,16 @@ async def test_new_command_only_clears_own_session(): runner._session_model_overrides[other_key] = { "model": "claude-sonnet-4-6", "provider": "anthropic", - "api_key": "sk-ant-test", + "api_key": "***", "base_url": "", "api_mode": "anthropic", } + runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"} + runner._session_reasoning_overrides[other_key] = {"enabled": True, "effort": "low"} await runner._handle_reset_command(_make_event("/new")) assert session_key not in runner._session_model_overrides assert other_key in runner._session_model_overrides + assert session_key not in runner._session_reasoning_overrides + assert other_key in runner._session_reasoning_overrides