fix(desktop): keep composer usable during reconnect (#45488)

* feat(cli): add --safe-mode troubleshooting flag

Inspired by Claude Code v2.1.169 (June 2026): run Hermes with all
customizations disabled to isolate setup problems from product bugs.

--safe-mode implies --ignore-user-config and --ignore-rules, and
additionally skips plugin discovery (hermes_cli/plugins.py) and MCP
server loading (tools/mcp_tool.py) via the internal HERMES_SAFE_MODE
env bridge.

* fix(desktop): keep composer usable during reconnect
This commit is contained in:
Teknium 2026-06-13 02:36:09 -07:00 committed by GitHub
parent b62e57b2f4
commit 8cf9d8689d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 246 additions and 26 deletions

View file

@ -24,6 +24,7 @@ afterEach(cleanup)
// state stays stale while the DOM already holds the text.
function Harness({
busy = false,
disabled = false,
queued = [],
onSubmit,
onQueue,
@ -31,6 +32,7 @@ function Harness({
onDrain
}: {
busy?: boolean
disabled?: boolean
queued?: readonly string[]
onSubmit: (text: string) => void
onQueue: (text: string) => void
@ -52,6 +54,10 @@ function Harness({
}
const submitDraft = () => {
if (disabled) {
return
}
const editor = editorRef.current
if (editor) {
const domText = composerPlainText(editor)
@ -84,6 +90,10 @@ function Harness({
const editorText = editorRef.current ? composerPlainText(editorRef.current) : draftRef.current
const hasLivePayload = editorText.trim().length > 0 || attachments.length > 0
if (disabled) {
return
}
if (!busy && !hasLivePayload && queued.length > 0) {
onDrain()
@ -186,4 +196,23 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
expect(onDrain).toHaveBeenCalledTimes(1)
expect(onSubmit).not.toHaveBeenCalled()
})
it('keeps reconnect drafts editable but blocks Enter submit until the gateway returns', async () => {
const onSubmit = vi.fn()
const onDrain = vi.fn()
const { getByTestId } = render(
<Harness disabled onCancel={vi.fn()} onDrain={onDrain} onQueue={vi.fn()} onSubmit={onSubmit} queued={['queued-1']} />
)
const editor = getByTestId('editor')
await act(async () => {
editor.textContent = 'draft while reconnecting'
fireEvent.input(editor)
fireEvent.keyDown(editor, { key: 'Enter' })
})
expect(editor.textContent).toBe('draft while reconnecting')
expect(onDrain).not.toHaveBeenCalled()
expect(onSubmit).not.toHaveBeenCalled()
})
})

View file

@ -247,6 +247,8 @@ export function ChatBar({
const gatewayState = useStore($gatewayState)
const newSessionPlaceholders = t.composer.newSessionPlaceholders
const followUpPlaceholders = t.composer.followUpPlaceholders
const reconnecting = gatewayState === 'closed' || gatewayState === 'error'
const inputDisabled = disabled && !reconnecting
// Resting placeholder: a starter for brand-new sessions, a continuation for
// existing ones. Picked once and only re-rolled when we genuinely move to a
@ -277,11 +279,13 @@ export function ChatBar({
setRestingPlaceholder(pickPlaceholder(sessionId ? followUpPlaceholders : newSessionPlaceholders))
}, [followUpPlaceholders, newSessionPlaceholders, sessionId])
// When the bar is disabled it's because the gateway isn't open. Distinguish a
// cold start ("Starting Hermes...") from a dropped connection we're trying to
// restore (e.g. after the Mac slept) so the stuck state reads as recoverable.
// When the transport is disabled it's because the gateway isn't open.
// Distinguish a cold start ("Starting Hermes...") from a dropped connection
// we're trying to restore. During reconnect, keep the textbox editable so a
// flaky network doesn't block drafting; only submit/backend actions stay
// disabled until the gateway is open again.
const placeholder = disabled
? gatewayState === 'closed' || gatewayState === 'error'
? reconnecting
? t.composer.placeholderReconnecting
: t.composer.placeholderStarting
: restingPlaceholder
@ -323,13 +327,13 @@ export function ChatBar({
)
useEffect(() => {
if (!disabled) {
if (!inputDisabled) {
focusInput()
}
}, [disabled, focusInput, focusKey, focusRequestId])
}, [focusInput, focusKey, focusRequestId, inputDisabled])
useEffect(() => {
if (disabled) {
if (inputDisabled) {
return undefined
}
@ -349,7 +353,7 @@ export function ChatBar({
offFocus()
offInsert()
}
}, [appendExternalText, disabled])
}, [appendExternalText, inputDisabled])
// Keep draftRef in sync with the assistant-ui composer state for callers
// that read the latest text outside the React render cycle. We don't push
@ -934,6 +938,10 @@ export function ChatBar({
const editorText = editorRef.current ? composerPlainText(editorRef.current) : draftRef.current
const hasLivePayload = editorText.trim().length > 0 || attachments.length > 0
if (disabled) {
return
}
if (!busy && !hasLivePayload && queuedPrompts.length > 0) {
void drainNextQueued()
@ -1476,6 +1484,10 @@ export function ChatBar({
}
const submitDraft = () => {
if (disabled) {
return
}
// Source the text from the DOM editor, not React state. The AUI composer
// state (`draft`) and the derived `hasComposerPayload` lag the DOM by a
// render, so on fast typing or IME composition the final keystroke(s) may
@ -1656,6 +1668,7 @@ export function ChatBar({
const input = (
<div className={cn('relative', stacked ? 'w-full' : 'min-w-(--composer-input-inline-min-width) flex-1')}>
<div
aria-disabled={inputDisabled ? true : undefined}
aria-label={t.composer.message}
autoCapitalize="off"
autoCorrect="off"
@ -1666,7 +1679,7 @@ export function ChatBar({
stacked && 'pl-3',
stacked ? 'w-full' : 'min-w-(--composer-input-inline-min-width) flex-1'
)}
contentEditable={!disabled}
contentEditable={!inputDisabled}
data-placeholder={placeholder}
data-slot={RICH_INPUT_SLOT}
onBlur={() => window.setTimeout(closeTrigger, 80)}

View file

@ -3,23 +3,23 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest'
import { $desktopBoot } from '@/store/boot'
import { $desktopOnboarding } from '@/store/onboarding'
import { $gatewayState, setGatewayState } from '@/store/session'
import { setGatewayState } from '@/store/session'
import { BootFailureOverlay } from './boot-failure-overlay'
import { GatewayConnectingOverlay } from './gateway-connecting-overlay'
// Repro for the "remote gateway → stuck on CONNECTING, no way to settings"
// report. The connecting overlay (z-1200, full-screen, pointer-events on) is
// shown whenever `gatewayState !== 'open' && !boot.error`. The ONLY escape
// report. The connecting overlay (z-1200, full-screen, pointer-events on) used
// to be shown whenever `gatewayState !== 'open' && !boot.error`. The ONLY escape
// hatch — BootFailureOverlay, which has "Use local gateway" / "Sign in" /
// "Retry" — only renders when `boot.error` is set.
//
// useGatewayBoot only calls failDesktopBoot() (which sets boot.error) when the
// INITIAL boot() throws. After the first successful connect (bootCompleted),
// any later socket drop goes through scheduleReconnect(), which loops FOREVER
// against the dead remote and never sets boot.error. So gatewayState sits at
// 'closed'/'error' with boot.error null → CONNECTING forever, recovery overlay
// never appears, settings unreachable.
// against the dead remote. So gatewayState sits at 'closed'/'error' with
// boot.error null. The fix keeps the initial-boot overlay out of post-boot
// reconnects, leaving chat/settings usable while the reconnect loop runs.
function resetStores() {
setGatewayState('idle')
@ -75,7 +75,7 @@ describe('connecting overlay vs recovery surface', () => {
expect(isConnectingShown()).toBe(false)
})
it('REPRO: remote socket drops AFTER a successful boot → stuck on CONNECTING, no recovery, no settings', () => {
it('post-boot socket drops do not re-cover the app with the initial CONNECTING overlay', () => {
// 1. Initial boot succeeded: gateway opened, boot completed (no error).
setGatewayState('open')
const { rerender } = render(
@ -97,14 +97,14 @@ describe('connecting overlay vs recovery surface', () => {
</>
)
// The connecting overlay reappears and latches...
expect(isConnectingShown()).toBe(true)
// ...with NO recovery surface, because boot.error was never set.
// The initial-boot connecting overlay stays out of the way, so settings and
// the composer remain reachable during the reconnect loop.
expect(isConnectingShown()).toBe(false)
expect(isRecoveryShown()).toBe(false)
// 3. Reconnect loops forever against the dead remote: gatewayState bounces
// closed → error → closed, boot.error never gets set. The user is
// pinned on CONNECTING with no path to Settings indefinitely.
// 3. Reconnect loops against the dead remote: gatewayState bounces closed
// → error → closed. Until the escalation path sets boot.error, the app
// remains usable instead of modal-blocked.
setGatewayState('error')
rerender(
<>
@ -113,7 +113,7 @@ describe('connecting overlay vs recovery surface', () => {
</>
)
expect($desktopBoot.get().error).toBeNull()
expect(isConnectingShown()).toBe(true)
expect(isConnectingShown()).toBe(false)
expect(isRecoveryShown()).toBe(false)
})

View file

@ -52,7 +52,13 @@ export function GatewayConnectingOverlay() {
const [tail, setTail] = useState(TAIL)
const [phase, setPhase] = useState<Phase>('live')
const connecting = gatewayState !== 'open' && !boot.error
// The full-screen connecting overlay is for initial boot only. After a
// healthy boot, flaky networks / sleep-wake can drop the socket and flip the
// gateway state back to closed/error while the app reconnects. Do not cover
// the chat then — users should still be able to type drafts, open settings,
// and recover instead of staring at a modal CONNECTING screen.
const initialBootActive = boot.visible || boot.running || boot.progress < 100
const connecting = gatewayState !== 'open' && !boot.error && initialBootActive
// Latches once we've actually shown the overlay, so the brief frame where
// gatewayState flips to "open" (connecting -> false) before the exit phase
// kicks in doesn't unmount us and cause a flash.

View file

@ -213,6 +213,13 @@ def build_top_level_parser():
default=False,
help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills",
)
_inherited_flag(
parser,
"--safe-mode",
action="store_true",
default=False,
help="Troubleshooting mode: disable ALL customizations — user config, AGENTS.md/memory injection, plugins, and MCP servers (implies --ignore-user-config and --ignore-rules)",
)
_inherited_flag(
parser,
"--tui",
@ -366,6 +373,13 @@ def build_top_level_parser():
default=argparse.SUPPRESS,
help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills. Combine with --ignore-user-config for a fully isolated run.",
)
_inherited_flag(
chat_parser,
"--safe-mode",
action="store_true",
default=argparse.SUPPRESS,
help="Troubleshooting mode: disable ALL customizations — user config, AGENTS.md/memory injection, plugins, and MCP servers (implies --ignore-user-config and --ignore-rules). Use to isolate whether a problem comes from your setup or from Hermes itself.",
)
chat_parser.add_argument(
"--source",
default=None,

View file

@ -2199,6 +2199,18 @@ def cmd_chat(args):
if getattr(args, "yolo", False):
os.environ["HERMES_YOLO_MODE"] = "1"
# --safe-mode: troubleshooting mode that disables ALL customizations.
# Inspired by Claude Code v2.1.169's --safe-mode (June 2026): run with a
# pristine environment to isolate whether a problem comes from the user's
# setup (config, rules files, plugins, MCP servers) or from Hermes itself.
# Implemented as a superset of --ignore-user-config + --ignore-rules plus
# plugin/MCP discovery suppression (HERMES_SAFE_MODE is checked by
# hermes_cli/plugins.py and tools/mcp_tool.py).
if getattr(args, "safe_mode", False):
os.environ["HERMES_SAFE_MODE"] = "1"
os.environ["HERMES_IGNORE_USER_CONFIG"] = "1"
os.environ["HERMES_IGNORE_RULES"] = "1"
# --ignore-user-config: make load_cli_config() / load_config() skip the
# user's ~/.hermes/config.yaml and return built-in defaults. Set BEFORE
# importing cli (which runs `CLI_CONFIG = load_cli_config()` at module
@ -2256,8 +2268,8 @@ def cmd_chat(args):
"checkpoints": getattr(args, "checkpoints", False),
"pass_session_id": getattr(args, "pass_session_id", False),
"max_turns": getattr(args, "max_turns", None),
"ignore_rules": getattr(args, "ignore_rules", False),
"ignore_user_config": getattr(args, "ignore_user_config", False),
"ignore_rules": getattr(args, "ignore_rules", False) or getattr(args, "safe_mode", False),
"ignore_user_config": getattr(args, "ignore_user_config", False) or getattr(args, "safe_mode", False),
"compact": getattr(args, "compact", False),
}
# Filter out None values

View file

@ -1124,6 +1124,14 @@ class PluginManager:
"""
if self._discovered and not force:
return
# Safe mode (--safe-mode / HERMES_SAFE_MODE=1): troubleshooting run
# with all customizations disabled. Skip plugin discovery entirely so
# no third-party code (hooks, tools, platforms) loads. Mark as
# discovered so callers see a clean empty registry, not a retry loop.
if env_var_enabled("HERMES_SAFE_MODE"):
logger.info("HERMES_SAFE_MODE=1 — plugin discovery skipped")
self._discovered = True
return
if force:
self._plugins.clear()
self._hooks.clear()

View file

@ -0,0 +1,130 @@
"""Tests for `hermes chat --safe-mode` — pristine troubleshooting runs.
Inspired by Claude Code v2.1.169's ``--safe-mode`` flag (June 2026), which
disables all customizations (CLAUDE.md, plugins, skills, hooks, MCP) for
troubleshooting. The Hermes equivalent:
* implies ``--ignore-user-config`` (built-in config defaults)
* implies ``--ignore-rules`` (no AGENTS.md/memory/preloaded-skill injection)
* skips plugin discovery entirely (``hermes_cli.plugins``)
* loads zero MCP servers (``tools.mcp_tool._load_mcp_config``)
"""
from __future__ import annotations
import os
import pytest
_VARS = ("HERMES_SAFE_MODE", "HERMES_IGNORE_USER_CONFIG", "HERMES_IGNORE_RULES")
@pytest.fixture(autouse=True)
def _clean_env(monkeypatch):
for var in _VARS:
monkeypatch.delenv(var, raising=False)
yield
for var in _VARS:
os.environ.pop(var, None)
class TestSafeModeEnvWiring:
"""cmd_chat must translate --safe-mode into the three env gates."""
def test_safe_mode_sets_all_gates(self):
# Mirrors the cmd_chat logic in hermes_cli/main.py.
class Args:
safe_mode = True
args = Args()
if getattr(args, "safe_mode", False):
os.environ["HERMES_SAFE_MODE"] = "1"
os.environ["HERMES_IGNORE_USER_CONFIG"] = "1"
os.environ["HERMES_IGNORE_RULES"] = "1"
assert os.environ.get("HERMES_SAFE_MODE") == "1"
assert os.environ.get("HERMES_IGNORE_USER_CONFIG") == "1"
assert os.environ.get("HERMES_IGNORE_RULES") == "1"
class TestSafeModePluginDiscovery:
"""Plugin discovery must be a no-op under HERMES_SAFE_MODE=1."""
def test_discovery_skipped(self, monkeypatch):
monkeypatch.setenv("HERMES_SAFE_MODE", "1")
from hermes_cli.plugins import PluginManager
mgr = PluginManager()
called = []
monkeypatch.setattr(
mgr, "_discover_and_load_inner", lambda: called.append(True)
)
mgr.discover_and_load()
assert called == [] # inner sweep never ran
assert mgr._discovered is True # registry settled as clean-empty
assert mgr._plugins == {}
def test_discovery_runs_without_safe_mode(self, monkeypatch):
monkeypatch.delenv("HERMES_SAFE_MODE", raising=False)
from hermes_cli.plugins import PluginManager
mgr = PluginManager()
called = []
monkeypatch.setattr(
mgr, "_discover_and_load_inner", lambda: called.append(True)
)
mgr.discover_and_load()
assert called == [True]
class TestSafeModeMCP:
"""_load_mcp_config must return no servers under HERMES_SAFE_MODE=1."""
def test_mcp_servers_empty(self, monkeypatch):
monkeypatch.setenv("HERMES_SAFE_MODE", "1")
from tools.mcp_tool import _load_mcp_config
with pytest.MonkeyPatch.context() as mp:
mp.setattr(
"hermes_cli.config.load_config",
lambda: {"mcp_servers": {"github": {"url": "https://example.com/mcp"}}},
)
assert _load_mcp_config() == {}
def test_mcp_servers_load_without_safe_mode(self, monkeypatch):
monkeypatch.delenv("HERMES_SAFE_MODE", raising=False)
from tools.mcp_tool import _load_mcp_config
with pytest.MonkeyPatch.context() as mp:
mp.setattr(
"hermes_cli.config.load_config",
lambda: {"mcp_servers": {"github": {"url": "https://example.com/mcp"}}},
)
servers = _load_mcp_config()
assert "github" in servers
class TestSafeModeParser:
"""--safe-mode must parse on both the root parser and `hermes chat`."""
def test_chat_subcommand_accepts_flag(self):
from hermes_cli._parser import build_top_level_parser
parser, _subparsers, _chat = build_top_level_parser()
args = parser.parse_args(["chat", "--safe-mode"])
assert getattr(args, "safe_mode", False) is True
def test_root_parser_accepts_flag(self):
from hermes_cli._parser import build_top_level_parser
parser, _subparsers, _chat = build_top_level_parser()
args = parser.parse_args(["--safe-mode"])
assert getattr(args, "safe_mode", False) is True
def test_default_is_off(self):
from hermes_cli._parser import build_top_level_parser
parser, _subparsers, _chat = build_top_level_parser()
args = parser.parse_args(["chat"])
assert getattr(args, "safe_mode", False) is False

View file

@ -2686,6 +2686,11 @@ def _load_mcp_config() -> Dict[str, dict]:
"""
try:
from hermes_cli.config import load_config
# Safe mode (--safe-mode / HERMES_SAFE_MODE=1): troubleshooting run
# with all customizations disabled — no MCP servers connect.
from utils import env_var_enabled as _env_enabled
if _env_enabled("HERMES_SAFE_MODE"):
return {}
config = load_config()
servers = config.get("mcp_servers")
if not servers or not isinstance(servers, dict):

View file

@ -112,6 +112,7 @@ Common options:
| `--pass-session-id` | Pass the session ID into the system prompt. |
| `--ignore-user-config` | Ignore `~/.hermes/config.yaml` and use built-in defaults. Credentials in `.env` are still loaded. Useful for isolated CI runs, reproducible bug reports, and third-party integrations. |
| `--ignore-rules` | Skip auto-injection of `AGENTS.md`, `SOUL.md`, `.cursorrules`, persistent memory, and preloaded skills. Combine with `--ignore-user-config` for a fully isolated run. |
| `--safe-mode` | Troubleshooting mode: disable ALL customizations — user config, rules/memory injection, plugins, and MCP servers (implies `--ignore-user-config` and `--ignore-rules`). Use to isolate whether a problem comes from your setup or from Hermes itself. |
| `--source <tag>` | Session source tag for filtering (default: `cli`). Use `tool` for third-party integrations that should not appear in user session lists. |
| `--max-turns <N>` | Maximum tool-calling iterations per conversation turn (default: 90, or `agent.max_turns` in config). |
@ -125,6 +126,7 @@ hermes chat --toolsets web,terminal,skills
hermes chat --quiet -q "Return only JSON"
hermes chat --worktree -q "Review this repo and open a PR"
hermes chat --ignore-user-config --ignore-rules -q "Repro without my personal setup"
hermes chat --safe-mode -q "Is this bug mine or Hermes'?"
```
### `hermes -z <prompt>` — scripted one-shot

View file

@ -595,6 +595,7 @@ Advanced per-platform knobs for throttling the outbound message batcher. Most us
| `HERMES_ACCEPT_HOOKS` | Auto-approve any unseen shell hooks declared in `config.yaml` without a TTY prompt. Equivalent to `--accept-hooks` or `hooks_auto_accept: true`. |
| `HERMES_IGNORE_USER_CONFIG` | Skip `~/.hermes/config.yaml` and use built-in defaults (credentials in `.env` still load). Equivalent to `--ignore-user-config`. |
| `HERMES_IGNORE_RULES` | Skip auto-injection of `AGENTS.md`, `SOUL.md`, `.cursorrules`, memory, and preloaded skills. Equivalent to `--ignore-rules`. |
| `HERMES_SAFE_MODE` | Troubleshooting mode: disable ALL customizations — skips plugin discovery and MCP server loading. Set automatically by `--safe-mode` (which also sets the two flags above). |
| `HERMES_MD_NAMES` | Comma-separated list of rules-file names to auto-inject (default: `AGENTS.md,CLAUDE.md,.cursorrules,SOUL.md`). |
| `HERMES_TOOL_PROGRESS` | Deprecated compatibility variable for tool progress display. Prefer `display.tool_progress` in `config.yaml`. |
| `HERMES_TOOL_PROGRESS_MODE` | Deprecated compatibility variable for tool progress mode. Prefer `display.tool_progress` in `config.yaml`. |