Merge pull request #14890 from NousResearch/bb/tui-web-chat-unified

feat(web): dashboard Chat tab — xterm.js + JSON-RPC sidecar (supersedes #12710 + #13379)
2026-04-25 00:51:20 +00:00 · 2026-04-24 10:35:43 -07:00 · 2026-04-24 10:35:43 -07:00 · c61547c067
commit c61547c067
parent 7f0f67d5f7 850fac14e3
50 changed files with 4937 additions and 640 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@ -240,6 +240,19 @@ npm run fmt       # prettier
 npm test          # vitest
 ```

+### TUI in the Dashboard (`hermes dashboard` → `/chat`)
+
+The dashboard embeds the real `hermes --tui` — **not** a rewrite.  See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
+
+- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
+- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
+- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
+- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
+
+**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead.
+
+**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired.
+
 ---

 ## Adding New Tools
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -6715,9 +6715,15 @@ def cmd_dashboard(args):
    try:
        import fastapi  # noqa: F401
        import uvicorn  # noqa: F401
-    except ImportError:
-        print("Web UI dependencies not installed.")
-        print(f"Install them with:  {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'")
+    except ImportError as e:
+        print("Web UI dependencies not installed (need fastapi + uvicorn).")
+        print(
+            f"Re-install the package into this interpreter so metadata updates apply:\n"
+            f"  cd {PROJECT_ROOT}\n"
+            f"  {sys.executable} -m pip install -e .\n"
+            "If `pip` is missing in this venv, use:  uv pip install -e ."
+        )
+        print(f"Import error: {e}")
        sys.exit(1)

    if "HERMES_WEB_DIST" not in os.environ:
@ -6726,11 +6732,13 @@ def cmd_dashboard(args):

    from hermes_cli.web_server import start_server

+    embedded_chat = args.tui or os.environ.get("HERMES_DASHBOARD_TUI") == "1"
    start_server(
        host=args.host,
        port=args.port,
        open_browser=not args.no_open,
        allow_public=getattr(args, "insecure", False),
+        embedded_chat=embedded_chat,
    )


@ -8916,6 +8924,14 @@ Examples:
        action="store_true",
        help="Allow binding to non-localhost (DANGEROUS: exposes API keys on the network)",
    )
+    dashboard_parser.add_argument(
+        "--tui",
+        action="store_true",
+        help=(
+            "Expose the in-browser Chat tab (embedded `hermes --tui` via PTY/WebSocket). "
+            "Alternatively set HERMES_DASHBOARD_TUI=1."
+        ),
+    )
    dashboard_parser.set_defaults(func=cmd_dashboard)

    # =========================================================================
--- a/hermes_cli/pty_bridge.py
+++ b/hermes_cli/pty_bridge.py
@ -0,0 +1,229 @@
+"""PTY bridge for `hermes dashboard` chat tab.
+
+Wraps a child process behind a pseudo-terminal so its ANSI output can be
+streamed to a browser-side terminal emulator (xterm.js) and typed
+keystrokes can be fed back in.  The only caller today is the
+``/api/pty`` WebSocket endpoint in ``hermes_cli.web_server``.
+
+Design constraints:
+
+* **POSIX-only.**  Hermes Agent supports Windows exclusively via WSL, which
+  exposes a native POSIX PTY via ``openpty(3)``.  Native Windows Python
+  has no PTY; :class:`PtyUnavailableError` is raised with a user-readable
+  install/platform message so the dashboard can render a banner instead of
+  crashing.
+* **Zero Node dependency on the server side.**  We use :mod:`ptyprocess`,
+  which is a pure-Python wrapper around the OS calls.  The browser talks
+  to the same ``hermes --tui`` binary it would launch from the CLI, so
+  every TUI feature (slash popover, model picker, tool rows, markdown,
+  skin engine, clarify/sudo/approval prompts) ships automatically.
+* **Byte-safe I/O.**  Reads and writes go through the PTY master fd
+  directly — we avoid :class:`ptyprocess.PtyProcessUnicode` because
+  streaming ANSI is inherently byte-oriented and UTF-8 boundaries may land
+  mid-read.
+"""
+
+from __future__ import annotations
+
+import errno
+import fcntl
+import os
+import select
+import signal
+import struct
+import sys
+import termios
+import time
+from typing import Optional, Sequence
+
+try:
+    import ptyprocess  # type: ignore
+    _PTY_AVAILABLE = not sys.platform.startswith("win")
+except ImportError:  # pragma: no cover - dev env without ptyprocess
+    ptyprocess = None  # type: ignore
+    _PTY_AVAILABLE = False
+
+
+__all__ = ["PtyBridge", "PtyUnavailableError"]
+
+
+class PtyUnavailableError(RuntimeError):
+    """Raised when a PTY cannot be created on this platform.
+
+    Today this means native Windows (no ConPTY bindings) or a dev
+    environment missing the ``ptyprocess`` dependency.  The dashboard
+    surfaces the message to the user as a chat-tab banner.
+    """
+
+
+class PtyBridge:
+    """Thin wrapper around ``ptyprocess.PtyProcess`` for byte streaming.
+
+    Not thread-safe.  A single bridge is owned by the WebSocket handler
+    that spawned it; the reader runs in an executor thread while writes
+    happen on the event-loop thread.  Both sides are OK because the
+    kernel PTY is the actual synchronization point — we never call
+    :mod:`ptyprocess` methods concurrently, we only call ``os.read`` and
+    ``os.write`` on the master fd, which is safe.
+    """
+
+    def __init__(self, proc: "ptyprocess.PtyProcess"):  # type: ignore[name-defined]
+        self._proc = proc
+        self._fd: int = proc.fd
+        self._closed = False
+
+    # -- lifecycle --------------------------------------------------------
+
+    @classmethod
+    def is_available(cls) -> bool:
+        """True if a PTY can be spawned on this platform."""
+        return bool(_PTY_AVAILABLE)
+
+    @classmethod
+    def spawn(
+        cls,
+        argv: Sequence[str],
+        *,
+        cwd: Optional[str] = None,
+        env: Optional[dict] = None,
+        cols: int = 80,
+        rows: int = 24,
+    ) -> "PtyBridge":
+        """Spawn ``argv`` behind a new PTY and return a bridge.
+
+        Raises :class:`PtyUnavailableError` if the platform can't host a
+        PTY.  Raises :class:`FileNotFoundError` or :class:`OSError` for
+        ordinary exec failures (missing binary, bad cwd, etc.).
+        """
+        if not _PTY_AVAILABLE:
+            if sys.platform.startswith("win"):
+                raise PtyUnavailableError(
+                    "Pseudo-terminals are unavailable on this platform. "
+                    "Hermes Agent supports Windows only via WSL."
+                )
+            if ptyprocess is None:
+                raise PtyUnavailableError(
+                    "The `ptyprocess` package is missing. "
+                    "Install with: pip install ptyprocess "
+                    "(or pip install -e '.[pty]')."
+                )
+            raise PtyUnavailableError("Pseudo-terminals are unavailable.")
+        # Let caller-supplied env fully override inheritance; if they pass
+        # None we inherit the server's env (same semantics as subprocess).
+        spawn_env = os.environ.copy() if env is None else env
+        proc = ptyprocess.PtyProcess.spawn(  # type: ignore[union-attr]
+            list(argv),
+            cwd=cwd,
+            env=spawn_env,
+            dimensions=(rows, cols),
+        )
+        return cls(proc)
+
+    @property
+    def pid(self) -> int:
+        return int(self._proc.pid)
+
+    def is_alive(self) -> bool:
+        if self._closed:
+            return False
+        try:
+            return bool(self._proc.isalive())
+        except Exception:
+            return False
+
+    # -- I/O --------------------------------------------------------------
+
+    def read(self, timeout: float = 0.2) -> Optional[bytes]:
+        """Read up to 64 KiB of raw bytes from the PTY master.
+
+        Returns:
+            * bytes — zero or more bytes of child output
+            * empty bytes (``b""``) — no data available within ``timeout``
+            * None — child has exited and the master fd is at EOF
+
+        Never blocks longer than ``timeout`` seconds.  Safe to call after
+        :meth:`close`; returns ``None`` in that case.
+        """
+        if self._closed:
+            return None
+        try:
+            readable, _, _ = select.select([self._fd], [], [], timeout)
+        except (OSError, ValueError):
+            return None
+        if not readable:
+            return b""
+        try:
+            data = os.read(self._fd, 65536)
+        except OSError as exc:
+            # EIO on Linux = slave side closed.  EBADF = already closed.
+            if exc.errno in (errno.EIO, errno.EBADF):
+                return None
+            raise
+        if not data:
+            return None
+        return data
+
+    def write(self, data: bytes) -> None:
+        """Write raw bytes to the PTY master (i.e. the child's stdin)."""
+        if self._closed or not data:
+            return
+        # os.write can return a short write under load; loop until drained.
+        view = memoryview(data)
+        while view:
+            try:
+                n = os.write(self._fd, view)
+            except OSError as exc:
+                if exc.errno in (errno.EIO, errno.EBADF, errno.EPIPE):
+                    return
+                raise
+            if n <= 0:
+                return
+            view = view[n:]
+
+    def resize(self, cols: int, rows: int) -> None:
+        """Forward a terminal resize to the child via ``TIOCSWINSZ``."""
+        if self._closed:
+            return
+        # struct winsize: rows, cols, xpixel, ypixel (all unsigned short)
+        winsize = struct.pack("HHHH", max(1, rows), max(1, cols), 0, 0)
+        try:
+            fcntl.ioctl(self._fd, termios.TIOCSWINSZ, winsize)
+        except OSError:
+            pass
+
+    # -- teardown ---------------------------------------------------------
+
+    def close(self) -> None:
+        """Terminate the child (SIGTERM → 0.5s grace → SIGKILL) and close fds.
+
+        Idempotent.  Reaping the child is important so we don't leak
+        zombies across the lifetime of the dashboard process.
+        """
+        if self._closed:
+            return
+        self._closed = True
+
+        # SIGHUP is the conventional "your terminal went away" signal.
+        # We escalate if the child ignores it.
+        for sig in (signal.SIGHUP, signal.SIGTERM, signal.SIGKILL):
+            if not self._proc.isalive():
+                break
+            try:
+                self._proc.kill(sig)
+            except Exception:
+                pass
+            deadline = time.monotonic() + 0.5
+            while self._proc.isalive() and time.monotonic() < deadline:
+                time.sleep(0.02)
+
+        try:
+            self._proc.close(force=True)
+        except Exception:
+            pass
+
+    # Context-manager sugar — handy in tests and ad-hoc scripts.
+    def __enter__(self) -> "PtyBridge":
+        return self
+
+    def __exit__(self, *_exc) -> None:
+        self.close()
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@ -49,7 +49,7 @@ from hermes_cli.config import (
 from gateway.status import get_running_pid, read_runtime_status

 try:
-    from fastapi import FastAPI, HTTPException, Request
+    from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
    from fastapi.middleware.cors import CORSMiddleware
    from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
    from fastapi.staticfiles import StaticFiles
@ -73,6 +73,10 @@ app = FastAPI(title="Hermes Agent", version=__version__)
 _SESSION_TOKEN = secrets.token_urlsafe(32)
 _SESSION_HEADER_NAME = "X-Hermes-Session-Token"

+# In-browser Chat tab (/chat, /api/pty, …).  Off unless ``hermes dashboard --tui``
+# or HERMES_DASHBOARD_TUI=1.  Set from :func:`start_server`.
+_DASHBOARD_EMBEDDED_CHAT_ENABLED = False
+
 # Simple rate limiter for the reveal endpoint
 _reveal_timestamps: List[float] = []
 _REVEAL_MAX_PER_WINDOW = 5
@ -2263,6 +2267,329 @@ async def get_usage_analytics(days: int = 30):
        db.close()


+# ---------------------------------------------------------------------------
+# /api/pty — PTY-over-WebSocket bridge for the dashboard "Chat" tab.
+#
+# The endpoint spawns the same ``hermes --tui`` binary the CLI uses, behind
+# a POSIX pseudo-terminal, and forwards bytes + resize escapes across a
+# WebSocket.  The browser renders the ANSI through xterm.js (see
+# web/src/pages/ChatPage.tsx).
+#
+# Auth: ``?token=<session_token>`` query param (browsers can't set
+# Authorization on the WS upgrade).  Same ephemeral ``_SESSION_TOKEN`` as
+# REST.  Localhost-only — we defensively reject non-loopback clients even
+# though uvicorn binds to 127.0.0.1.
+# ---------------------------------------------------------------------------
+
+import re
+import asyncio
+
+from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError
+
+_RESIZE_RE = re.compile(rb"\x1b\[RESIZE:(\d+);(\d+)\]")
+_PTY_READ_CHUNK_TIMEOUT = 0.2
+_VALID_CHANNEL_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
+# Starlette's TestClient reports the peer as "testclient"; treat it as
+# loopback so tests don't need to rewrite request scope.
+_LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"})
+
+# Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard)
+# and /api/events (dashboard → browser sidebar).  Keyed by an opaque channel id
+# the chat tab generates on mount; entries auto-evict when the last subscriber
+# drops AND the publisher has disconnected.
+_event_channels: dict[str, set] = {}
+_event_lock = asyncio.Lock()
+
+
+def _resolve_chat_argv(
+    resume: Optional[str] = None,
+    sidecar_url: Optional[str] = None,
+) -> tuple[list[str], Optional[str], Optional[dict]]:
+    """Resolve the argv + cwd + env for the chat PTY.
+
+    Default: whatever ``hermes --tui`` would run.  Tests monkeypatch this
+    function to inject a tiny fake command (``cat``, ``sh -c 'printf …'``)
+    so nothing has to build Node or the TUI bundle.
+
+    Session resume is propagated via the ``HERMES_TUI_RESUME`` env var —
+    matching what ``hermes_cli.main._launch_tui`` does for the CLI path.
+    Appending ``--resume <id>`` to argv doesn't work because ``ui-tui`` does
+    not parse its argv.
+
+    `sidecar_url` (when set) is forwarded as ``HERMES_TUI_SIDECAR_URL`` so
+    the spawned ``tui_gateway.entry`` can mirror dispatcher emits to the
+    dashboard's ``/api/pub`` endpoint (see :func:`pub_ws`).
+    """
+    from hermes_cli.main import PROJECT_ROOT, _make_tui_argv
+
+    argv, cwd = _make_tui_argv(PROJECT_ROOT / "ui-tui", tui_dev=False)
+    env: Optional[dict] = None
+
+    if resume or sidecar_url:
+        env = os.environ.copy()
+
+        if resume:
+            env["HERMES_TUI_RESUME"] = resume
+
+        if sidecar_url:
+            env["HERMES_TUI_SIDECAR_URL"] = sidecar_url
+
+    return list(argv), str(cwd) if cwd else None, env
+
+
+def _build_sidecar_url(channel: str) -> Optional[str]:
+    """ws:// URL the PTY child should publish events to, or None when unbound."""
+    host = getattr(app.state, "bound_host", None)
+    port = getattr(app.state, "bound_port", None)
+
+    if not host or not port:
+        return None
+
+    netloc = f"[{host}]:{port}" if ":" in host and not host.startswith("[") else f"{host}:{port}"
+    qs = urllib.parse.urlencode({"token": _SESSION_TOKEN, "channel": channel})
+
+    return f"ws://{netloc}/api/pub?{qs}"
+
+
+async def _broadcast_event(channel: str, payload: str) -> None:
+    """Fan out one publisher frame to every subscriber on `channel`."""
+    async with _event_lock:
+        subs = list(_event_channels.get(channel, ()))
+
+    for sub in subs:
+        try:
+            await sub.send_text(payload)
+        except Exception:
+            # Subscriber went away mid-send; the /api/events finally clause
+            # will remove it from the registry on its next iteration.
+            pass
+
+
+def _channel_or_close_code(ws: WebSocket) -> Optional[str]:
+    """Return the channel id from the query string or None if invalid."""
+    channel = ws.query_params.get("channel", "")
+
+    return channel if _VALID_CHANNEL_RE.match(channel) else None
+
+
+@app.websocket("/api/pty")
+async def pty_ws(ws: WebSocket) -> None:
+    if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
+        await ws.close(code=4403)
+        return
+
+    # --- auth + loopback check (before accept so we can close cleanly) ---
+    token = ws.query_params.get("token", "")
+    expected = _SESSION_TOKEN
+    if not hmac.compare_digest(token.encode(), expected.encode()):
+        await ws.close(code=4401)
+        return
+
+    client_host = ws.client.host if ws.client else ""
+    if client_host and client_host not in _LOOPBACK_HOSTS:
+        await ws.close(code=4403)
+        return
+
+    await ws.accept()
+
+    # --- spawn PTY ------------------------------------------------------
+    resume = ws.query_params.get("resume") or None
+    channel = _channel_or_close_code(ws)
+    sidecar_url = _build_sidecar_url(channel) if channel else None
+
+    try:
+        argv, cwd, env = _resolve_chat_argv(resume=resume, sidecar_url=sidecar_url)
+    except SystemExit as exc:
+        # _make_tui_argv calls sys.exit(1) when node/npm is missing.
+        await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
+        await ws.close(code=1011)
+        return
+
+
+    try:
+        bridge = PtyBridge.spawn(argv, cwd=cwd, env=env)
+    except PtyUnavailableError as exc:
+        await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
+        await ws.close(code=1011)
+        return
+    except (FileNotFoundError, OSError) as exc:
+        await ws.send_text(f"\r\n\x1b[31mChat failed to start: {exc}\x1b[0m\r\n")
+        await ws.close(code=1011)
+        return
+
+    loop = asyncio.get_running_loop()
+
+    # --- reader task: PTY master → WebSocket ----------------------------
+    async def pump_pty_to_ws() -> None:
+        while True:
+            chunk = await loop.run_in_executor(
+                None, bridge.read, _PTY_READ_CHUNK_TIMEOUT
+            )
+            if chunk is None:  # EOF
+                return
+            if not chunk:  # no data this tick; yield control and retry
+                await asyncio.sleep(0)
+                continue
+            try:
+                await ws.send_bytes(chunk)
+            except Exception:
+                return
+
+    reader_task = asyncio.create_task(pump_pty_to_ws())
+
+    # --- writer loop: WebSocket → PTY master ----------------------------
+    try:
+        while True:
+            msg = await ws.receive()
+            msg_type = msg.get("type")
+            if msg_type == "websocket.disconnect":
+                break
+            raw = msg.get("bytes")
+            if raw is None:
+                text = msg.get("text")
+                raw = text.encode("utf-8") if isinstance(text, str) else b""
+            if not raw:
+                continue
+
+            # Resize escape is consumed locally, never written to the PTY.
+            match = _RESIZE_RE.match(raw)
+            if match and match.end() == len(raw):
+                cols = int(match.group(1))
+                rows = int(match.group(2))
+                bridge.resize(cols=cols, rows=rows)
+                continue
+
+            bridge.write(raw)
+    except WebSocketDisconnect:
+        pass
+    finally:
+        reader_task.cancel()
+        try:
+            await reader_task
+        except (asyncio.CancelledError, Exception):
+            pass
+        bridge.close()
+
+
+# ---------------------------------------------------------------------------
+# /api/ws — JSON-RPC WebSocket sidecar for the dashboard "Chat" tab.
+#
+# Drives the same `tui_gateway.dispatch` surface Ink uses over stdio, so the
+# dashboard can render structured metadata (model badge, tool-call sidebar,
+# slash launcher, session info) alongside the xterm.js terminal that PTY
+# already paints. Both transports bind to the same session id when one is
+# active, so a tool.start emitted by the agent fans out to both sinks.
+# ---------------------------------------------------------------------------
+
+
+@app.websocket("/api/ws")
+async def gateway_ws(ws: WebSocket) -> None:
+    if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
+        await ws.close(code=4403)
+        return
+
+    token = ws.query_params.get("token", "")
+    if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
+        await ws.close(code=4401)
+        return
+
+    client_host = ws.client.host if ws.client else ""
+    if client_host and client_host not in _LOOPBACK_HOSTS:
+        await ws.close(code=4403)
+        return
+
+    from tui_gateway.ws import handle_ws
+
+    await handle_ws(ws)
+
+
+# ---------------------------------------------------------------------------
+# /api/pub + /api/events — chat-tab event broadcast.
+#
+# The PTY-side ``tui_gateway.entry`` opens /api/pub at startup (driven by
+# HERMES_TUI_SIDECAR_URL set in /api/pty's PTY env) and writes every
+# dispatcher emit through it.  The dashboard fans those frames out to any
+# subscriber that opened /api/events on the same channel id.  This is what
+# gives the React sidebar its tool-call feed without breaking the PTY
+# child's stdio handshake with Ink.
+# ---------------------------------------------------------------------------
+
+
+@app.websocket("/api/pub")
+async def pub_ws(ws: WebSocket) -> None:
+    if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
+        await ws.close(code=4403)
+        return
+
+    token = ws.query_params.get("token", "")
+    if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
+        await ws.close(code=4401)
+        return
+
+    client_host = ws.client.host if ws.client else ""
+    if client_host and client_host not in _LOOPBACK_HOSTS:
+        await ws.close(code=4403)
+        return
+
+    channel = _channel_or_close_code(ws)
+    if not channel:
+        await ws.close(code=4400)
+        return
+
+    await ws.accept()
+
+    try:
+        while True:
+            await _broadcast_event(channel, await ws.receive_text())
+    except WebSocketDisconnect:
+        pass
+
+
+@app.websocket("/api/events")
+async def events_ws(ws: WebSocket) -> None:
+    if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
+        await ws.close(code=4403)
+        return
+
+    token = ws.query_params.get("token", "")
+    if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
+        await ws.close(code=4401)
+        return
+
+    client_host = ws.client.host if ws.client else ""
+    if client_host and client_host not in _LOOPBACK_HOSTS:
+        await ws.close(code=4403)
+        return
+
+    channel = _channel_or_close_code(ws)
+    if not channel:
+        await ws.close(code=4400)
+        return
+
+    await ws.accept()
+
+    async with _event_lock:
+        _event_channels.setdefault(channel, set()).add(ws)
+
+    try:
+        while True:
+            # Subscribers don't speak — the receive() just blocks until
+            # disconnect so the connection stays open as long as the
+            # browser holds it.
+            await ws.receive_text()
+    except WebSocketDisconnect:
+        pass
+    finally:
+        async with _event_lock:
+            subs = _event_channels.get(channel)
+
+            if subs is not None:
+                subs.discard(ws)
+
+                if not subs:
+                    _event_channels.pop(channel, None)
+
+
 def mount_spa(application: FastAPI):
    """Mount the built SPA. Falls back to index.html for client-side routing.

@ -2284,8 +2611,10 @@ def mount_spa(application: FastAPI):
    def _serve_index():
        """Return index.html with the session token injected."""
        html = _index_path.read_text()
+        chat_js = "true" if _DASHBOARD_EMBEDDED_CHAT_ENABLED else "false"
        token_script = (
-            f'<script>window.__HERMES_SESSION_TOKEN__="{_SESSION_TOKEN}";</script>'
+            f'<script>window.__HERMES_SESSION_TOKEN__="{_SESSION_TOKEN}";'
+            f"window.__HERMES_DASHBOARD_EMBEDDED_CHAT__={chat_js};</script>"
        )
        html = html.replace("</head>", f"{token_script}</head>", 1)
        return HTMLResponse(
@ -2798,10 +3127,15 @@ def start_server(
    port: int = 9119,
    open_browser: bool = True,
    allow_public: bool = False,
+    *,
+    embedded_chat: bool = False,
 ):
    """Start the web UI server."""
    import uvicorn

+    global _DASHBOARD_EMBEDDED_CHAT_ENABLED
+    _DASHBOARD_EMBEDDED_CHAT_ENABLED = embedded_chat
+
    _LOCALHOST = ("127.0.0.1", "localhost", "::1")
    if host not in _LOCALHOST and not allow_public:
        raise SystemExit(
@ -2817,7 +3151,10 @@ def start_server(

    # Record the bound host so host_header_middleware can validate incoming
    # Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7).
+    # bound_port is also stashed so /api/pty can build the back-WS URL the
+    # PTY child uses to publish events to the dashboard sidebar.
    app.state.bound_host = host
+    app.state.bound_port = port

    if open_browser:
        import webbrowser
--- a/pyproject.toml
+++ b/pyproject.toml
@ -78,6 +78,7 @@ termux = [
 ]
 dingtalk = ["dingtalk-stream>=0.20,<1", "alibabacloud-dingtalk>=2.0.0", "qrcode>=7.0,<8"]
 feishu = ["lark-oapi>=1.5.3,<2", "qrcode>=7.0,<8"]
+# `hermes dashboard` (localhost SPA + API).  Not in core to keep the default install lean.
 web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"]
 rl = [
  "atroposlib @ git+https://github.com/NousResearch/atropos.git@c20c85256e5a45ad31edf8b7276e9c5ee1995a30",
--- a/skills/mlops/models/segment-anything/SKILL.md
+++ b/skills/mlops/models/segment-anything/SKILL.md
@ -134,6 +134,7 @@ masks = processor.image_processor.post_process_masks(

 ### Model architecture

+<!-- ascii-guard-ignore -->
 ```
 SAM Architecture:
 ┌─────────────────┐     ┌─────────────────┐     ┌─────────────────┐
@ -144,6 +145,7 @@ SAM Architecture:
   Image Embeddings      Prompt Embeddings         Masks + IoU
   (computed once)       (per prompt)             predictions
 ```
+<!-- ascii-guard-ignore-end -->

 ### Model variants

--- a/skills/research/research-paper-writing/SKILL.md
+++ b/skills/research/research-paper-writing/SKILL.md
@ -22,6 +22,7 @@ End-to-end pipeline for producing publication-ready ML/AI research papers target

 This is **not a linear pipeline** — it is an iterative loop. Results trigger new experiments. Reviews trigger new analysis. The agent must handle these feedback loops.

+<!-- ascii-guard-ignore -->
 ```
 ┌─────────────────────────────────────────────────────────────┐
 │                    RESEARCH PAPER PIPELINE                  │
@ -41,6 +42,7 @@ This is **not a linear pipeline** — it is an iterative loop. Results trigger n
 │                                                             │
 └─────────────────────────────────────────────────────────────┘
 ```
+<!-- ascii-guard-ignore-end -->

 ---

--- a/tests/hermes_cli/test_pty_bridge.py
+++ b/tests/hermes_cli/test_pty_bridge.py
@ -0,0 +1,172 @@
+"""Unit tests for hermes_cli.pty_bridge — PTY spawning + byte forwarding.
+
+These tests drive the bridge with minimal POSIX processes (echo, env, sleep,
+printf) to verify it behaves like a PTY you can read/write/resize/close.
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+import time
+
+import pytest
+
+pytest.importorskip("ptyprocess", reason="ptyprocess not installed")
+
+from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError
+
+
+skip_on_windows = pytest.mark.skipif(
+    sys.platform.startswith("win"), reason="PTY bridge is POSIX-only"
+)
+
+
+def _read_until(bridge: PtyBridge, needle: bytes, timeout: float = 5.0) -> bytes:
+    """Accumulate PTY output until we see `needle` or time out."""
+    deadline = time.monotonic() + timeout
+    buf = bytearray()
+    while time.monotonic() < deadline:
+        chunk = bridge.read(timeout=0.2)
+        if chunk is None:
+            break
+        buf.extend(chunk)
+        if needle in buf:
+            return bytes(buf)
+    return bytes(buf)
+
+
+@skip_on_windows
+class TestPtyBridgeSpawn:
+    def test_is_available_on_posix(self):
+        assert PtyBridge.is_available() is True
+
+    def test_spawn_returns_bridge_with_pid(self):
+        bridge = PtyBridge.spawn(["true"])
+        try:
+            assert bridge.pid > 0
+        finally:
+            bridge.close()
+
+    def test_spawn_raises_on_missing_argv0(self, tmp_path):
+        with pytest.raises((FileNotFoundError, OSError)):
+            PtyBridge.spawn([str(tmp_path / "definitely-not-a-real-binary")])
+
+
+@skip_on_windows
+class TestPtyBridgeIO:
+    def test_reads_child_stdout(self):
+        bridge = PtyBridge.spawn(["/bin/sh", "-c", "printf hermes-ok"])
+        try:
+            output = _read_until(bridge, b"hermes-ok")
+            assert b"hermes-ok" in output
+        finally:
+            bridge.close()
+
+    def test_write_sends_to_child_stdin(self):
+        # `cat` with no args echoes stdin back to stdout.  We write a line,
+        # read it back, then signal EOF to let cat exit cleanly.
+        bridge = PtyBridge.spawn(["/bin/cat"])
+        try:
+            bridge.write(b"hello-pty\n")
+            output = _read_until(bridge, b"hello-pty")
+            assert b"hello-pty" in output
+        finally:
+            bridge.close()
+
+    def test_read_returns_none_after_child_exits(self):
+        bridge = PtyBridge.spawn(["/bin/sh", "-c", "printf done"])
+        try:
+            _read_until(bridge, b"done")
+            # Give the child a beat to exit cleanly, then drain until EOF.
+            deadline = time.monotonic() + 3.0
+            while bridge.is_alive() and time.monotonic() < deadline:
+                bridge.read(timeout=0.1)
+            # Next reads after exit should return None (EOF), not raise.
+            got_none = False
+            for _ in range(10):
+                if bridge.read(timeout=0.1) is None:
+                    got_none = True
+                    break
+            assert got_none, "PtyBridge.read did not return None after child EOF"
+        finally:
+            bridge.close()
+
+
+@skip_on_windows
+class TestPtyBridgeResize:
+    def test_resize_updates_child_winsize(self):
+        # tput reads COLUMNS/LINES from the TTY ioctl (TIOCGWINSZ).
+        # Spawn a shell, resize, then ask tput for the dimensions.
+        bridge = PtyBridge.spawn(
+            ["/bin/sh", "-c", "sleep 0.1; tput cols; tput lines"],
+            cols=80,
+            rows=24,
+        )
+        try:
+            bridge.resize(cols=123, rows=45)
+            output = _read_until(bridge, b"45", timeout=5.0)
+            # tput prints just the numbers, one per line
+            assert b"123" in output
+            assert b"45" in output
+        finally:
+            bridge.close()
+
+
+@skip_on_windows
+class TestPtyBridgeClose:
+    def test_close_is_idempotent(self):
+        bridge = PtyBridge.spawn(["/bin/sh", "-c", "sleep 30"])
+        bridge.close()
+        bridge.close()  # must not raise
+        assert not bridge.is_alive()
+
+    def test_close_terminates_long_running_child(self):
+        bridge = PtyBridge.spawn(["/bin/sh", "-c", "sleep 30"])
+        pid = bridge.pid
+        bridge.close()
+        # Give the kernel a moment to reap
+        deadline = time.monotonic() + 3.0
+        reaped = False
+        while time.monotonic() < deadline:
+            try:
+                os.kill(pid, 0)
+                time.sleep(0.05)
+            except ProcessLookupError:
+                reaped = True
+                break
+        assert reaped, f"pid {pid} still running after close()"
+
+
+@skip_on_windows
+class TestPtyBridgeEnv:
+    def test_cwd_is_respected(self, tmp_path):
+        bridge = PtyBridge.spawn(
+            ["/bin/sh", "-c", "pwd"],
+            cwd=str(tmp_path),
+        )
+        try:
+            output = _read_until(bridge, str(tmp_path).encode())
+            assert str(tmp_path).encode() in output
+        finally:
+            bridge.close()
+
+    def test_env_is_forwarded(self):
+        bridge = PtyBridge.spawn(
+            ["/bin/sh", "-c", "printf %s \"$HERMES_PTY_TEST\""],
+            env={**os.environ, "HERMES_PTY_TEST": "pty-env-works"},
+        )
+        try:
+            output = _read_until(bridge, b"pty-env-works")
+            assert b"pty-env-works" in output
+        finally:
+            bridge.close()
+
+
+class TestPtyBridgeUnavailable:
+    """Platform fallback semantics — PtyUnavailableError is importable and
+    carries a user-readable message."""
+
+    def test_error_carries_user_message(self):
+        err = PtyUnavailableError("platform not supported")
+        assert "platform" in str(err)
--- a/tests/hermes_cli/test_web_server.py
+++ b/tests/hermes_cli/test_web_server.py
@ -1677,3 +1677,251 @@ class TestDashboardPluginManifestExtensions:
        plugins = web_server._get_dashboard_plugins(force_rescan=True)
        entry = next(p for p in plugins if p["name"] == "mixed-slots")
        assert entry["slots"] == ["sidebar", "header-right"]
+
+
+# ---------------------------------------------------------------------------
+# /api/pty WebSocket — terminal bridge for the dashboard "Chat" tab.
+#
+# These tests drive the endpoint with a tiny fake command (typically ``cat``
+# or ``sh -c 'printf …'``) instead of the real ``hermes --tui`` binary.  The
+# endpoint resolves its argv through ``_resolve_chat_argv``, so tests
+# monkeypatch that hook.
+# ---------------------------------------------------------------------------
+
+import sys
+
+
+skip_on_windows = pytest.mark.skipif(
+    sys.platform.startswith("win"), reason="PTY bridge is POSIX-only"
+)
+
+
+@skip_on_windows
+class TestPtyWebSocket:
+    @pytest.fixture(autouse=True)
+    def _setup(self, monkeypatch, _isolate_hermes_home):
+        from starlette.testclient import TestClient
+
+        import hermes_cli.web_server as ws
+
+        # Avoid exec'ing the actual TUI in tests: every test below installs
+        # its own fake argv via ``ws._resolve_chat_argv``.
+        self.ws_module = ws
+        monkeypatch.setattr(ws, "_DASHBOARD_EMBEDDED_CHAT_ENABLED", True)
+        self.token = ws._SESSION_TOKEN
+        self.client = TestClient(ws.app)
+
+    def _url(self, token: str | None = None, **params: str) -> str:
+        tok = token if token is not None else self.token
+        # TestClient.websocket_connect takes the path; it reconstructs the
+        # query string, so we pass it inline.
+        from urllib.parse import urlencode
+
+        q = {"token": tok, **params}
+        return f"/api/pty?{urlencode(q)}"
+
+    def test_rejects_when_embedded_chat_disabled(self, monkeypatch):
+        monkeypatch.setattr(self.ws_module, "_DASHBOARD_EMBEDDED_CHAT_ENABLED", False)
+        from starlette.websockets import WebSocketDisconnect
+
+        with pytest.raises(WebSocketDisconnect) as exc:
+            with self.client.websocket_connect(self._url()):
+                pass
+        assert exc.value.code == 4403
+
+    def test_rejects_missing_token(self, monkeypatch):
+        monkeypatch.setattr(
+            self.ws_module,
+            "_resolve_chat_argv",
+            lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
+        )
+        from starlette.websockets import WebSocketDisconnect
+
+        with pytest.raises(WebSocketDisconnect) as exc:
+            with self.client.websocket_connect("/api/pty"):
+                pass
+        assert exc.value.code == 4401
+
+    def test_rejects_bad_token(self, monkeypatch):
+        monkeypatch.setattr(
+            self.ws_module,
+            "_resolve_chat_argv",
+            lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
+        )
+        from starlette.websockets import WebSocketDisconnect
+
+        with pytest.raises(WebSocketDisconnect) as exc:
+            with self.client.websocket_connect(self._url(token="wrong")):
+                pass
+        assert exc.value.code == 4401
+
+    def test_streams_child_stdout_to_client(self, monkeypatch):
+        monkeypatch.setattr(
+            self.ws_module,
+            "_resolve_chat_argv",
+            lambda resume=None, sidecar_url=None: (
+                ["/bin/sh", "-c", "printf hermes-ws-ok"],
+                None,
+                None,
+            ),
+        )
+        with self.client.websocket_connect(self._url()) as conn:
+            # Drain frames until we see the needle or time out.  TestClient's
+            # recv_bytes blocks; loop until we have the signal byte string.
+            buf = b""
+            import time
+
+            deadline = time.monotonic() + 5.0
+            while time.monotonic() < deadline:
+                try:
+                    frame = conn.receive_bytes()
+                except Exception:
+                    break
+                if frame:
+                    buf += frame
+                if b"hermes-ws-ok" in buf:
+                    break
+            assert b"hermes-ws-ok" in buf
+
+    def test_client_input_reaches_child_stdin(self, monkeypatch):
+        # ``cat`` echoes stdin back, so a write → read round-trip proves
+        # the full duplex path.
+        monkeypatch.setattr(
+            self.ws_module,
+            "_resolve_chat_argv",
+            lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
+        )
+        with self.client.websocket_connect(self._url()) as conn:
+            conn.send_bytes(b"round-trip-payload\n")
+            buf = b""
+            import time
+
+            deadline = time.monotonic() + 5.0
+            while time.monotonic() < deadline:
+                frame = conn.receive_bytes()
+                if frame:
+                    buf += frame
+                if b"round-trip-payload" in buf:
+                    break
+            assert b"round-trip-payload" in buf
+
+    def test_resize_escape_is_forwarded(self, monkeypatch):
+        # Resize escape gets intercepted and applied via TIOCSWINSZ,
+        # then ``tput cols/lines`` reports the new dimensions back.
+        monkeypatch.setattr(
+            self.ws_module,
+            "_resolve_chat_argv",
+            # sleep gives the test time to push the resize before tput runs
+            lambda resume=None, sidecar_url=None: (
+                ["/bin/sh", "-c", "sleep 0.15; tput cols; tput lines"],
+                None,
+                None,
+            ),
+        )
+        with self.client.websocket_connect(self._url()) as conn:
+            conn.send_text("\x1b[RESIZE:99;41]")
+            buf = b""
+            import time
+
+            deadline = time.monotonic() + 5.0
+            while time.monotonic() < deadline:
+                frame = conn.receive_bytes()
+                if frame:
+                    buf += frame
+                if b"99" in buf and b"41" in buf:
+                    break
+            assert b"99" in buf and b"41" in buf
+
+    def test_unavailable_platform_closes_with_message(self, monkeypatch):
+        from hermes_cli.pty_bridge import PtyUnavailableError
+
+        def _raise(argv, **kwargs):
+            raise PtyUnavailableError("pty missing for tests")
+
+        monkeypatch.setattr(
+            self.ws_module,
+            "_resolve_chat_argv",
+            lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
+        )
+        # Patch PtyBridge.spawn at the web_server module's binding.
+        import hermes_cli.web_server as ws_mod
+
+        monkeypatch.setattr(ws_mod.PtyBridge, "spawn", classmethod(lambda cls, *a, **k: _raise(*a, **k)))
+
+        with self.client.websocket_connect(self._url()) as conn:
+            # Expect a final text frame with the error message, then close.
+            msg = conn.receive_text()
+            assert "pty missing" in msg or "unavailable" in msg.lower() or "pty" in msg.lower()
+
+    def test_resume_parameter_is_forwarded_to_argv(self, monkeypatch):
+        captured: dict = {}
+
+        def fake_resolve(resume=None, sidecar_url=None):
+            captured["resume"] = resume
+            return (["/bin/sh", "-c", "printf resume-arg-ok"], None, None)
+
+        monkeypatch.setattr(self.ws_module, "_resolve_chat_argv", fake_resolve)
+
+        with self.client.websocket_connect(self._url(resume="sess-42")) as conn:
+            # Drain briefly so the handler actually invokes the resolver.
+            try:
+                conn.receive_bytes()
+            except Exception:
+                pass
+        assert captured.get("resume") == "sess-42"
+
+    def test_channel_param_propagates_sidecar_url(self, monkeypatch):
+        """When /api/pty is opened with ?channel=, the PTY child gets a
+        HERMES_TUI_SIDECAR_URL env var pointing back at /api/pub on the
+        same channel — which is how tool events reach the dashboard sidebar."""
+        captured: dict = {}
+
+        def fake_resolve(resume=None, sidecar_url=None):
+            captured["sidecar_url"] = sidecar_url
+            return (["/bin/sh", "-c", "printf sidecar-ok"], None, None)
+
+        monkeypatch.setattr(self.ws_module, "_resolve_chat_argv", fake_resolve)
+        monkeypatch.setattr(
+            self.ws_module.app.state, "bound_host", "127.0.0.1", raising=False
+        )
+        monkeypatch.setattr(
+            self.ws_module.app.state, "bound_port", 9119, raising=False
+        )
+
+        with self.client.websocket_connect(self._url(channel="abc-123")) as conn:
+            try:
+                conn.receive_bytes()
+            except Exception:
+                pass
+
+        url = captured.get("sidecar_url") or ""
+        assert url.startswith("ws://127.0.0.1:9119/api/pub?")
+        assert "channel=abc-123" in url
+        assert "token=" in url
+
+    def test_pub_broadcasts_to_events_subscribers(self, monkeypatch):
+        """Frame written to /api/pub is rebroadcast verbatim to every
+        /api/events subscriber on the same channel."""
+        from urllib.parse import urlencode
+
+        qs = urlencode({"token": self.token, "channel": "broadcast-test"})
+        pub_path = f"/api/pub?{qs}"
+        sub_path = f"/api/events?{qs}"
+
+        with self.client.websocket_connect(sub_path) as sub:
+            with self.client.websocket_connect(pub_path) as pub:
+                pub.send_text('{"type":"tool.start","payload":{"tool_id":"t1"}}')
+                received = sub.receive_text()
+
+        assert "tool.start" in received
+        assert '"tool_id":"t1"' in received
+
+    def test_events_rejects_missing_channel(self):
+        from starlette.websockets import WebSocketDisconnect
+
+        with pytest.raises(WebSocketDisconnect) as exc:
+            with self.client.websocket_connect(
+                f"/api/events?token={self.token}"
+            ):
+                pass
+        assert exc.value.code == 4400
--- a/tui_gateway/entry.py
+++ b/tui_gateway/entry.py
@ -5,7 +5,28 @@ import sys
 import time
 import traceback

+from tui_gateway import server
 from tui_gateway.server import _CRASH_LOG, dispatch, resolve_skin, write_json
+from tui_gateway.transport import TeeTransport
+
+
+def _install_sidecar_publisher() -> None:
+    """Mirror every dispatcher emit to the dashboard sidebar via WS.
+
+    Activated by `HERMES_TUI_SIDECAR_URL`, set by the dashboard's
+    ``/api/pty`` endpoint when a chat tab passes a ``channel`` query param.
+    Best-effort: connect failure or runtime drop falls back to stdio-only.
+    """
+    url = os.environ.get("HERMES_TUI_SIDECAR_URL")
+
+    if not url:
+        return
+
+    from tui_gateway.event_publisher import WsPublisherTransport
+
+    server._stdio_transport = TeeTransport(
+        server._stdio_transport, WsPublisherTransport(url)
+    )


 def _log_signal(signum: int, frame) -> None:
@ -82,6 +103,8 @@ def _log_exit(reason: str) -> None:


 def main():
+    _install_sidecar_publisher()
+
    if not write_json({
        "jsonrpc": "2.0",
        "method": "event",
--- a/tui_gateway/event_publisher.py
+++ b/tui_gateway/event_publisher.py
@ -0,0 +1,126 @@
+"""Best-effort WebSocket publisher transport for the PTY-side gateway.
+
+The dashboard's `/api/pty` spawns `hermes --tui` as a child process, which
+spawns its own ``tui_gateway.entry``.  Tool/reasoning/status events fire on
+*that* gateway's transport — three processes removed from the dashboard
+server itself.  To surface them in the dashboard sidebar (`/api/events`),
+the PTY-side gateway opens a back-WS to the dashboard at startup and
+mirrors every emit through this transport.
+
+Wire protocol: newline-framed JSON dicts (the same shape the dispatcher
+already passes to ``write``).  No JSON-RPC envelope here — the dashboard's
+``/api/pub`` endpoint just rebroadcasts the bytes verbatim to subscribers.
+
+Failure mode: silent.  The agent loop must never block waiting for the
+sidecar to drain.  A dead WS short-circuits all subsequent writes.
+Actual ``send`` calls run on a daemon thread so the TeeTransport's
+``write`` returns after enqueueing (best-effort; drop when the queue is full).
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import queue
+import threading
+from typing import Optional
+
+try:
+    from websockets.sync.client import connect as ws_connect
+except ImportError:  # pragma: no cover - websockets is a required install path
+    ws_connect = None  # type: ignore[assignment]
+
+_log = logging.getLogger(__name__)
+
+_DRAIN_STOP = object()
+
+_QUEUE_MAX = 256
+
+
+class WsPublisherTransport:
+    __slots__ = ("_url", "_lock", "_ws", "_dead", "_q", "_worker")
+
+    def __init__(self, url: str, *, connect_timeout: float = 2.0) -> None:
+        self._url = url
+        self._lock = threading.Lock()
+        self._ws: Optional[object] = None
+        self._dead = False
+        self._q: queue.Queue[object] = queue.Queue(maxsize=_QUEUE_MAX)
+        self._worker: Optional[threading.Thread] = None
+
+        if ws_connect is None:
+            self._dead = True
+
+            return
+
+        try:
+            self._ws = ws_connect(url, open_timeout=connect_timeout, max_size=None)
+        except Exception as exc:
+            _log.debug("event publisher connect failed: %s", exc)
+            self._dead = True
+            self._ws = None
+
+            return
+
+        self._worker = threading.Thread(
+            target=self._drain,
+            name="hermes-ws-pub",
+            daemon=True,
+        )
+        self._worker.start()
+
+    def _drain(self) -> None:
+        while True:
+            item = self._q.get()
+            if item is _DRAIN_STOP:
+                return
+            if not isinstance(item, str):
+                continue
+            if self._ws is None:
+                continue
+            try:
+                with self._lock:
+                    if self._ws is not None:
+                        self._ws.send(item)  # type: ignore[union-attr]
+            except Exception as exc:
+                _log.debug("event publisher write failed: %s", exc)
+                self._dead = True
+                self._ws = None
+
+    def write(self, obj: dict) -> bool:
+        if self._dead or self._ws is None or self._worker is None:
+            return False
+
+        line = json.dumps(obj, ensure_ascii=False)
+
+        try:
+            self._q.put_nowait(line)
+
+            return True
+        except queue.Full:
+            return False
+
+    def close(self) -> None:
+        self._dead = True
+        w = self._worker
+        if w is not None and w.is_alive():
+            try:
+                self._q.put_nowait(_DRAIN_STOP)
+            except queue.Full:
+                # Best-effort: if the queue is wedged, the daemon thread
+                # will be torn down with the process.
+                pass
+            w.join(timeout=3.0)
+        self._worker = None
+
+        if self._ws is None:
+            return
+
+        try:
+            with self._lock:
+                if self._ws is not None:
+                    self._ws.close()  # type: ignore[union-attr]
+        except Exception:
+            pass
+
+        self._ws = None
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -1,5 +1,6 @@
 import atexit
 import concurrent.futures
+import contextvars
 import copy
 import json
 import logging
@ -12,9 +13,17 @@ import time
 import uuid
 from datetime import datetime
 from pathlib import Path
+from typing import Optional

 from hermes_constants import get_hermes_home
 from hermes_cli.env_loader import load_hermes_dotenv
+from tui_gateway.transport import (
+    StdioTransport,
+    Transport,
+    bind_transport,
+    current_transport,
+    reset_transport,
+)

 logger = logging.getLogger(__name__)

@ -147,6 +156,11 @@ atexit.register(lambda: _pool.shutdown(wait=False, cancel_futures=True))
 _real_stdout = sys.stdout
 sys.stdout = sys.stderr

+# Module-level stdio transport — fallback sink when no transport is bound via
+# contextvar or session. Stream resolved through a lambda so runtime monkey-
+# patches of `_real_stdout` (used extensively in tests) still land correctly.
+_stdio_transport = StdioTransport(lambda: _real_stdout, _stdout_lock)
+

 class _SlashWorker:
    """Persistent HermesCLI subprocess for slash commands."""
@ -266,14 +280,24 @@ def _db_unavailable_error(rid, *, code: int):


 def write_json(obj: dict) -> bool:
-    line = json.dumps(obj, ensure_ascii=False) + "\n"
-    try:
-        with _stdout_lock:
-            _real_stdout.write(line)
-            _real_stdout.flush()
-        return True
-    except BrokenPipeError:
-        return False
+    """Emit one JSON frame. Routes via the most-specific transport available.
+
+    Precedence:
+
+    1. Event frames with a session id → the transport stored on that session,
+       so async events land with the client that owns the session even if
+       the emitting thread has no contextvar binding.
+    2. Otherwise the transport bound on the current context (set by
+       :func:`dispatch` for the lifetime of a request).
+    3. Otherwise the module-level stdio transport, matching the historical
+       behaviour and keeping tests that monkey-patch ``_real_stdout`` green.
+    """
+    if obj.get("method") == "event":
+        sid = ((obj.get("params") or {}).get("session_id")) or ""
+        if sid and (t := (_sessions.get(sid) or {}).get("transport")) is not None:
+            return t.write(obj)
+
+    return (current_transport() or _stdio_transport).write(obj)


 def _emit(event: str, sid: str, payload: dict | None = None):
@ -343,27 +367,40 @@ def handle_request(req: dict) -> dict | None:
    return fn(req.get("id"), req.get("params", {}))


-def dispatch(req: dict) -> dict | None:
+def dispatch(req: dict, transport: Optional[Transport] = None) -> dict | None:
    """Route inbound RPCs — long handlers to the pool, everything else inline.

    Returns a response dict when handled inline. Returns None when the
-    handler was scheduled on the pool; the worker writes its own
-    response via write_json when done.
+    handler was scheduled on the pool; the worker writes its own response
+    via the bound transport when done.
+
+    *transport* (optional): pins every write produced by this request —
+    including any events emitted by the handler — to the given transport.
+    Omitting it falls back to the module-level stdio transport, preserving
+    the original behaviour for ``tui_gateway.entry``.
    """
-    if req.get("method") not in _LONG_HANDLERS:
-        return handle_request(req)
+    t = transport or _stdio_transport
+    token = bind_transport(t)
+    try:
+        if req.get("method") not in _LONG_HANDLERS:
+            return handle_request(req)

-    def run():
-        try:
-            resp = handle_request(req)
-        except Exception as exc:
-            resp = _err(req.get("id"), -32000, f"handler error: {exc}")
-        if resp is not None:
-            write_json(resp)
+        # Snapshot the context so the pool worker sees the bound transport.
+        ctx = contextvars.copy_context()

-    _pool.submit(run)
+        def run():
+            try:
+                resp = handle_request(req)
+            except Exception as exc:
+                resp = _err(req.get("id"), -32000, f"handler error: {exc}")
+            if resp is not None:
+                t.write(resp)

-    return None
+        _pool.submit(lambda: ctx.run(run))
+
+        return None
+    finally:
+        reset_transport(token)


 def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None:
@ -1262,6 +1299,9 @@ def _init_session(sid: str, key: str, agent, history: list, cols: int = 80):
        "tool_progress_mode": _load_tool_progress_mode(),
        "edit_snapshots": {},
        "tool_started_at": {},
+        # Pin async event emissions to whichever transport created the
+        # session (stdio for Ink, JSON-RPC WS for the dashboard sidebar).
+        "transport": current_transport() or _stdio_transport,
    }
    try:
        _sessions[sid]["slash_worker"] = _SlashWorker(
@ -1404,6 +1444,7 @@ def _(rid, params: dict) -> dict:
        "slash_worker": None,
        "tool_progress_mode": _load_tool_progress_mode(),
        "tool_started_at": {},
+        "transport": current_transport() or _stdio_transport,
    }

    def _build() -> None:
--- a/tui_gateway/transport.py
+++ b/tui_gateway/transport.py
@ -0,0 +1,127 @@
+"""Transport abstraction for the tui_gateway JSON-RPC server.
+
+Historically the gateway wrote every JSON frame directly to real stdout.  This
+module decouples the I/O sink from the handler logic so the same dispatcher
+can be driven over stdio (``tui_gateway.entry``) or WebSocket
+(``tui_gateway.ws``) without duplicating code.
+
+A :class:`Transport` is anything that can accept a JSON-serialisable dict and
+forward it to its peer.  The active transport for the current request is
+tracked in a :class:`contextvars.ContextVar` so handlers — including those
+dispatched onto the worker pool — route their writes to the right peer.
+
+Backward compatibility
+----------------------
+``tui_gateway.server.write_json`` still works without any transport bound.
+When nothing is on the contextvar and no session-level transport is found,
+it falls back to the module-level :class:`StdioTransport`, which wraps the
+original ``_real_stdout`` + ``_stdout_lock`` pair.  Tests that monkey-patch
+``server._real_stdout`` continue to work because the stdio transport resolves
+the stream lazily through a callback.
+"""
+
+from __future__ import annotations
+
+import contextvars
+import json
+import threading
+from typing import Any, Callable, Optional, Protocol, runtime_checkable
+
+
+@runtime_checkable
+class Transport(Protocol):
+    """Minimal interface every transport implements."""
+
+    def write(self, obj: dict) -> bool:
+        """Emit one JSON frame. Return ``False`` when the peer is gone."""
+
+    def close(self) -> None:
+        """Release any resources owned by this transport."""
+
+
+_current_transport: contextvars.ContextVar[Optional[Transport]] = (
+    contextvars.ContextVar(
+        "hermes_gateway_transport",
+        default=None,
+    )
+)
+
+
+def current_transport() -> Optional[Transport]:
+    """Return the transport bound for the current request, if any."""
+    return _current_transport.get()
+
+
+def bind_transport(transport: Optional[Transport]):
+    """Bind *transport* for the current context. Returns a token for :func:`reset_transport`."""
+    return _current_transport.set(transport)
+
+
+def reset_transport(token) -> None:
+    """Restore the transport binding captured by :func:`bind_transport`."""
+    _current_transport.reset(token)
+
+
+class StdioTransport:
+    """Writes JSON frames to a stream (usually ``sys.stdout``).
+
+    The stream is resolved via a callable so runtime monkey-patches of the
+    underlying stream continue to work — this preserves the behaviour the
+    existing test suite relies on (``monkeypatch.setattr(server, "_real_stdout", ...)``).
+    """
+
+    __slots__ = ("_stream_getter", "_lock")
+
+    def __init__(self, stream_getter: Callable[[], Any], lock: threading.Lock) -> None:
+        self._stream_getter = stream_getter
+        self._lock = lock
+
+    def write(self, obj: dict) -> bool:
+        line = json.dumps(obj, ensure_ascii=False) + "\n"
+        try:
+            with self._lock:
+                stream = self._stream_getter()
+                stream.write(line)
+                stream.flush()
+            return True
+        except BrokenPipeError:
+            return False
+
+    def close(self) -> None:
+        return None
+
+
+class TeeTransport:
+    """Mirrors writes to one primary plus N best-effort secondaries.
+
+    The primary's return value (and exceptions) determine the result —
+    secondaries swallow failures so a wedged sidecar never stalls the
+    main IO path.  Used by the PTY child so every dispatcher emit lands
+    on stdio (Ink) AND on a back-WS feeding the dashboard sidebar.
+    """
+
+    __slots__ = ("_primary", "_secondaries")
+
+    def __init__(self, primary: "Transport", *secondaries: "Transport") -> None:
+        self._primary = primary
+        self._secondaries = secondaries
+
+    def write(self, obj: dict) -> bool:
+        # Primary first so a slow sidecar (WS publisher) never delays Ink/stdio.
+        ok = self._primary.write(obj)
+        for sec in self._secondaries:
+            try:
+                sec.write(obj)
+            except Exception:
+                pass
+        return ok
+
+    def close(self) -> None:
+        try:
+            self._primary.close()
+        finally:
+            for sec in self._secondaries:
+                try:
+                    sec.close()
+                except Exception:
+                    pass
--- a/tui_gateway/ws.py
+++ b/tui_gateway/ws.py
@ -0,0 +1,174 @@
+"""WebSocket transport for the tui_gateway JSON-RPC server.
+
+Reuses :func:`tui_gateway.server.dispatch` verbatim so every RPC method, every
+slash command, every approval/clarify/sudo flow, and every agent event flows
+through the same handlers whether the client is Ink over stdio or an iOS /
+web client over WebSocket.
+
+Wire protocol
+-------------
+Identical to stdio: newline-delimited JSON-RPC in both directions. The server
+emits a ``gateway.ready`` event immediately after connection accept, then
+echoes responses/events for inbound requests. No framing differences.
+
+Mounting
+--------
+    from fastapi import WebSocket
+    from tui_gateway.ws import handle_ws
+
+    @app.websocket("/api/ws")
+    async def ws(ws: WebSocket):
+        await handle_ws(ws)
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+from typing import Any
+
+from tui_gateway import server
+
+_log = logging.getLogger(__name__)
+
+# Max seconds a pool-dispatched handler will block waiting for the event loop
+# to flush a WS frame before we mark the transport dead. Protects handler
+# threads from a wedged socket.
+_WS_WRITE_TIMEOUT_S = 10.0
+
+# Keep starlette optional at import time; handle_ws uses the real class when
+# it's available and falls back to a generic Exception sentinel otherwise.
+try:
+    from starlette.websockets import WebSocketDisconnect as _WebSocketDisconnect
+except ImportError:  # pragma: no cover - starlette is a required install path
+    _WebSocketDisconnect = Exception  # type: ignore[assignment]
+
+
+class WSTransport:
+    """Per-connection WS transport.
+
+    ``write`` is safe to call from any thread *other than* the event loop
+    thread that owns the socket. Pool workers (the only real caller) run in
+    their own threads, so marshalling onto the loop via
+    :func:`asyncio.run_coroutine_threadsafe` + ``future.result()`` is correct
+    and deadlock-free there.
+
+    When called from the loop thread itself (e.g. by ``handle_ws`` for an
+    inline response) the same call would deadlock: we'd schedule work onto
+    the loop we're currently blocking. We detect that case and fire-and-
+    forget instead. Callers that need to know when the bytes are on the wire
+    should use :meth:`write_async` from the loop thread.
+    """
+
+    def __init__(self, ws: Any, loop: asyncio.AbstractEventLoop) -> None:
+        self._ws = ws
+        self._loop = loop
+        self._closed = False
+
+    def write(self, obj: dict) -> bool:
+        if self._closed:
+            return False
+
+        line = json.dumps(obj, ensure_ascii=False)
+
+        try:
+            on_loop = asyncio.get_running_loop() is self._loop
+        except RuntimeError:
+            on_loop = False
+
+        if on_loop:
+            # Fire-and-forget — don't block the loop waiting on itself.
+            self._loop.create_task(self._safe_send(line))
+            return True
+
+        try:
+            fut = asyncio.run_coroutine_threadsafe(self._safe_send(line), self._loop)
+            fut.result(timeout=_WS_WRITE_TIMEOUT_S)
+            return not self._closed
+        except Exception as exc:
+            self._closed = True
+            _log.debug("ws write failed: %s", exc)
+            return False
+
+    async def write_async(self, obj: dict) -> bool:
+        """Send from the owning event loop. Awaits until the frame is on the wire."""
+        if self._closed:
+            return False
+        await self._safe_send(json.dumps(obj, ensure_ascii=False))
+        return not self._closed
+
+    async def _safe_send(self, line: str) -> None:
+        try:
+            await self._ws.send_text(line)
+        except Exception as exc:
+            self._closed = True
+            _log.debug("ws send failed: %s", exc)
+
+    def close(self) -> None:
+        self._closed = True
+
+
+async def handle_ws(ws: Any) -> None:
+    """Run one WebSocket session. Wire-compatible with ``tui_gateway.entry``."""
+    await ws.accept()
+
+    transport = WSTransport(ws, asyncio.get_running_loop())
+
+    await transport.write_async(
+        {
+            "jsonrpc": "2.0",
+            "method": "event",
+            "params": {
+                "type": "gateway.ready",
+                "payload": {"skin": server.resolve_skin()},
+            },
+        }
+    )
+
+    try:
+        while True:
+            try:
+                raw = await ws.receive_text()
+            except _WebSocketDisconnect:
+                break
+
+            line = raw.strip()
+            if not line:
+                continue
+
+            try:
+                req = json.loads(line)
+            except json.JSONDecodeError:
+                ok = await transport.write_async(
+                    {
+                        "jsonrpc": "2.0",
+                        "error": {"code": -32700, "message": "parse error"},
+                        "id": None,
+                    }
+                )
+                if not ok:
+                    break
+                continue
+
+            # dispatch() may schedule long handlers on the pool; it returns
+            # None in that case and the worker writes the response itself via
+            # the transport we pass in (a separate thread, so transport.write
+            # is the safe path there). For inline handlers it returns the
+            # response dict, which we write here from the loop.
+            resp = await asyncio.to_thread(server.dispatch, req, transport)
+            if resp is not None and not await transport.write_async(resp):
+                break
+    finally:
+        transport.close()
+
+        # Detach the transport from any sessions it owned so later emits
+        # fall back to stdio instead of crashing into a closed socket.
+        for _, sess in list(server._sessions.items()):
+            if sess.get("transport") is transport:
+                sess["transport"] = server._stdio_transport
+
+        try:
+            await ws.close()
+        except Exception:
+            pass
--- a/ui-tui/src/app/slash/commands/core.ts
+++ b/ui-tui/src/app/slash/commands/core.ts
@ -246,7 +246,7 @@ export const coreCommands: SlashCommand[] = [
      }

      writeOsc52Clipboard(target.text)
-      sys('sent OSC52 copy sequence (terminal support required)')
+      sys(`copied ${target.text.length} chars`)
    }
  },

--- a/uv.lock
+++ b/uv.lock
@ -9,7 +9,7 @@ resolution-markers = [
 ]

 [options]
-exclude-newer = "2026-04-16T11:49:00.318115Z"
+exclude-newer = "2026-04-17T16:49:45.944715922Z"
 exclude-newer-span = "P7D"

 [[package]]
@ -1870,7 +1870,7 @@ wheels = [

 [[package]]
 name = "hermes-agent"
-version = "0.10.0"
+version = "0.11.0"
 source = { editable = "." }
 dependencies = [
    { name = "anthropic" },
--- a/web/package-lock.json
+++ b/web/package-lock.json
--- a/web/package.json
+++ b/web/package.json
@ -17,6 +17,11 @@
    "@observablehq/plot": "^0.6.17",
    "@react-three/fiber": "^9.6.0",
    "@tailwindcss/vite": "^4.2.1",
+    "@xterm/addon-fit": "^0.11.0",
+    "@xterm/addon-unicode11": "^0.9.0",
+    "@xterm/addon-web-links": "^0.12.0",
+    "@xterm/addon-webgl": "^0.19.0",
+    "@xterm/xterm": "^6.0.0",
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
    "gsap": "^3.15.0",
--- a/web/public/fonts-terminal/JetBrainsMono-Bold.woff2
+++ b/web/public/fonts-terminal/JetBrainsMono-Bold.woff2
--- a/web/public/fonts-terminal/JetBrainsMono-Italic.woff2
+++ b/web/public/fonts-terminal/JetBrainsMono-Italic.woff2
--- a/web/public/fonts-terminal/JetBrainsMono-Regular.woff2
+++ b/web/public/fonts-terminal/JetBrainsMono-Regular.woff2
--- a/web/src/App.tsx
+++ b/web/src/App.tsx
@ -58,19 +58,28 @@ import LogsPage from "@/pages/LogsPage";
 import AnalyticsPage from "@/pages/AnalyticsPage";
 import CronPage from "@/pages/CronPage";
 import SkillsPage from "@/pages/SkillsPage";
+import ChatPage from "@/pages/ChatPage";
 import { LanguageSwitcher } from "@/components/LanguageSwitcher";
 import { ThemeSwitcher } from "@/components/ThemeSwitcher";
 import { useI18n } from "@/i18n";
 import { PluginPage, PluginSlot, usePlugins } from "@/plugins";
 import type { PluginManifest } from "@/plugins";
 import { useTheme } from "@/themes";
+import { isDashboardEmbeddedChatEnabled } from "@/lib/dashboard-flags";

 function RootRedirect() {
  return <Navigate to="/sessions" replace />;
 }

-/** Built-in route → page component. Used for routing and for plugin `tab.path` / `tab.override` resolution. */
-const BUILTIN_ROUTES: Record<string, ComponentType> = {
+const CHAT_NAV_ITEM: NavItem = {
+  path: "/chat",
+  labelKey: "chat",
+  label: "Chat",
+  icon: Terminal,
+};
+
+/** Built-in routes except /chat (only with `hermes dashboard --tui`). */
+const BUILTIN_ROUTES_CORE: Record<string, ComponentType> = {
  "/": RootRedirect,
  "/sessions": SessionsPage,
  "/analytics": AnalyticsPage,
@ -82,7 +91,7 @@ const BUILTIN_ROUTES: Record<string, ComponentType> = {
  "/docs": DocsPage,
 };

-const BUILTIN_NAV: NavItem[] = [
+const BUILTIN_NAV_REST: NavItem[] = [
  {
    path: "/sessions",
    labelKey: "sessions",
@ -167,7 +176,10 @@ function buildNavItems(builtIn: NavItem[], manifests: PluginManifest[]): NavItem
  return items;
 }

-function buildRoutes(manifests: PluginManifest[]): Array<{
+function buildRoutes(
+  builtinRoutes: Record<string, ComponentType>,
+  manifests: PluginManifest[],
+): Array<{
  key: string;
  path: string;
  element: ReactNode;
@ -189,7 +201,7 @@ function buildRoutes(manifests: PluginManifest[]): Array<{
    element: ReactNode;
  }> = [];

-  for (const [path, Component] of Object.entries(BUILTIN_ROUTES)) {
+  for (const [path, Component] of Object.entries(builtinRoutes)) {
    const om = byOverride.get(path);
    if (om) {
      routes.push({
@ -204,7 +216,7 @@ function buildRoutes(manifests: PluginManifest[]): Array<{

  for (const m of addons) {
    if (m.tab.hidden) continue;
-    if (BUILTIN_ROUTES[m.tab.path]) continue;
+    if (builtinRoutes[m.tab.path]) continue;
    routes.push({
      key: `plugin:${m.name}`,
      path: m.tab.path,
@ -214,7 +226,7 @@ function buildRoutes(manifests: PluginManifest[]): Array<{

  for (const m of manifests) {
    if (!m.tab.hidden) continue;
-    if (BUILTIN_ROUTES[m.tab.path] || m.tab.override) continue;
+    if (builtinRoutes[m.tab.path] || m.tab.override) continue;
    routes.push({
      key: `plugin:hidden:${m.name}`,
      path: m.tab.path,
@ -233,12 +245,32 @@ export default function App() {
  const [mobileOpen, setMobileOpen] = useState(false);
  const closeMobile = useCallback(() => setMobileOpen(false), []);
  const isDocsRoute = pathname === "/docs" || pathname === "/docs/";
+  const normalizedPath = pathname.replace(/\/$/, "") || "/";
+  const isChatRoute = normalizedPath === "/chat";
+  const embeddedChat = isDashboardEmbeddedChatEnabled();
+
+  const builtinRoutes = useMemo(
+    () => ({
+      ...BUILTIN_ROUTES_CORE,
+      ...(embeddedChat ? { "/chat": ChatPage } : {}),
+    }),
+    [embeddedChat],
+  );
+
+  const builtinNav = useMemo(
+    () =>
+      embeddedChat ? [CHAT_NAV_ITEM, ...BUILTIN_NAV_REST] : BUILTIN_NAV_REST,
+    [embeddedChat],
+  );

  const navItems = useMemo(
-    () => buildNavItems(BUILTIN_NAV, manifests),
-    [manifests],
+    () => buildNavItems(builtinNav, manifests),
+    [builtinNav, manifests],
+  );
+  const routes = useMemo(
+    () => buildRoutes(builtinRoutes, manifests),
+    [builtinRoutes, manifests],
  );
-  const routes = useMemo(() => buildRoutes(manifests), [manifests]);
  const pluginTabMeta = useMemo(
    () =>
      manifests
@ -465,8 +497,9 @@ export default function App() {
              className={cn(
                "relative z-2 flex min-w-0 min-h-0 flex-1 flex-col",
                "px-3 sm:px-6",
-                "pt-2 sm:pt-4 lg:pt-6",
-                "pb-4 sm:pb-8",
+                isChatRoute
+                  ? "pb-3 pt-1 sm:pb-4 sm:pt-2 lg:pt-4"
+                  : "pt-2 sm:pt-4 lg:pt-6 pb-4 sm:pb-8",
                isDocsRoute && "min-h-0 flex-1",
              )}
            >
@ -474,7 +507,7 @@ export default function App() {
              <div
                className={cn(
                  "w-full min-w-0",
-                  isDocsRoute && "min-h-0 flex flex-1 flex-col",
+                  (isDocsRoute || isChatRoute) && "min-h-0 flex flex-1 flex-col",
                )}
              >
                <Routes>
--- a/web/src/components/ChatSidebar.tsx
+++ b/web/src/components/ChatSidebar.tsx
@ -0,0 +1,379 @@
+/**
+ * ChatSidebar — structured-events panel that sits next to the xterm.js
+ * terminal in the dashboard Chat tab.
+ *
+ * Two WebSockets, one per concern:
+ *
+ *   1. **JSON-RPC sidecar** (`GatewayClient` → /api/ws) — drives the
+ *      sidebar's own slot of the dashboard's in-process gateway.  Owns
+ *      the model badge / picker / connection state / error banner.
+ *      Independent of the PTY pane's session by design — those are the
+ *      pieces the sidebar needs to be able to drive directly (model
+ *      switch via slash.exec, etc.).
+ *
+ *   2. **Event subscriber** (/api/events?channel=…) — passive, receives
+ *      every dispatcher emit from the PTY-side `tui_gateway.entry` that
+ *      the dashboard fanned out.  This is how `tool.start/progress/
+ *      complete` from the agent loop reach the sidebar even though the
+ *      PTY child runs three processes deep from us.  The `channel` id
+ *      ties this listener to the same chat tab's PTY child — see
+ *      `ChatPage.tsx` for where the id is generated.
+ *
+ * Best-effort throughout: WS failures show in the badge / banner, the
+ * terminal pane keeps working unimpaired.
+ */
+
+import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Card } from "@/components/ui/card";
+
+import { ModelPickerDialog } from "@/components/ModelPickerDialog";
+import { ToolCall, type ToolEntry } from "@/components/ToolCall";
+import { GatewayClient, type ConnectionState } from "@/lib/gatewayClient";
+
+import { cn } from "@/lib/utils";
+import { AlertCircle, ChevronDown, RefreshCw } from "lucide-react";
+import { useCallback, useEffect, useMemo, useState } from "react";
+
+interface SessionInfo {
+  cwd?: string;
+  model?: string;
+  provider?: string;
+  credential_warning?: string;
+}
+
+interface RpcEnvelope {
+  method?: string;
+  params?: { type?: string; payload?: unknown };
+}
+
+const TOOL_LIMIT = 20;
+
+const STATE_LABEL: Record<ConnectionState, string> = {
+  idle: "idle",
+  connecting: "connecting",
+  open: "live",
+  closed: "closed",
+  error: "error",
+};
+
+const STATE_TONE: Record<ConnectionState, string> = {
+  idle: "bg-muted text-muted-foreground",
+  connecting: "bg-primary/10 text-primary",
+  open: "bg-emerald-500/10 text-emerald-500 dark:text-emerald-400",
+  closed: "bg-muted text-muted-foreground",
+  error: "bg-destructive/10 text-destructive",
+};
+
+interface ChatSidebarProps {
+  channel: string;
+  className?: string;
+}
+
+export function ChatSidebar({ channel, className }: ChatSidebarProps) {
+  // `version` bumps on reconnect; gw is derived so we never call setState
+  // for it inside an effect (React 19's set-state-in-effect rule). The
+  // counter is the dependency on purpose — it's not read in the memo body,
+  // it's the signal that says "rebuild the client".
+  const [version, setVersion] = useState(0);
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  const gw = useMemo(() => new GatewayClient(), [version]);
+
+  const [state, setState] = useState<ConnectionState>("idle");
+  const [sessionId, setSessionId] = useState<string | null>(null);
+  const [info, setInfo] = useState<SessionInfo>({});
+  const [tools, setTools] = useState<ToolEntry[]>([]);
+  const [modelOpen, setModelOpen] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+
+  useEffect(() => {
+    let cancelled = false;
+    const offState = gw.onState(setState);
+
+    const offSessionInfo = gw.on<SessionInfo>("session.info", (ev) => {
+      if (ev.session_id) {
+        setSessionId(ev.session_id);
+      }
+
+      if (ev.payload) {
+        setInfo((prev) => ({ ...prev, ...ev.payload }));
+      }
+    });
+
+    const offError = gw.on<{ message?: string }>("error", (ev) => {
+      const message = ev.payload?.message;
+
+      if (message) {
+        setError(message);
+      }
+    });
+
+    // Adopt whichever session the gateway hands us. session.create on the
+    // sidecar is independent of the PTY pane's session by design — we
+    // only need a sid to drive the model picker's slash.exec calls.
+    gw.connect()
+      .then(() => {
+        if (cancelled) {
+          return;
+        }
+        return gw.request<{ session_id: string }>("session.create", {});
+      })
+      .then((created) => {
+        if (cancelled || !created?.session_id) {
+          return;
+        }
+        setSessionId(created.session_id);
+      })
+      .catch((e: Error) => {
+        if (!cancelled) {
+          setError(e.message);
+        }
+      });
+
+    return () => {
+      cancelled = true;
+      offState();
+      offSessionInfo();
+      offError();
+      gw.close();
+    };
+  }, [gw]);
+
+  // Event subscriber WebSocket — receives the rebroadcast of every
+  // dispatcher emit from the PTY child's gateway.  See /api/pub +
+  // /api/events in hermes_cli/web_server.py for the broadcast hop.
+  //
+  // Failures (auth/loopback rejection, server too old to expose the
+  // endpoint, transient drops) surface in the same banner as the
+  // JSON-RPC sidecar so the sidebar matches its documented best-effort
+  // UX and the user always has a reconnect affordance.
+  useEffect(() => {
+    const token = window.__HERMES_SESSION_TOKEN__;
+
+    if (!token || !channel) {
+      return;
+    }
+
+    const proto = window.location.protocol === "https:" ? "wss:" : "ws:";
+    const qs = new URLSearchParams({ token, channel });
+    const ws = new WebSocket(
+      `${proto}//${window.location.host}/api/events?${qs.toString()}`,
+    );
+
+    // `unmounting` suppresses the banner during cleanup — `ws.close()`
+    // from the effect's return fires a close event with code 1005 that
+    // would otherwise look like an unexpected drop.
+    const DISCONNECTED = "events feed disconnected — tool calls may not appear";
+    let unmounting = false;
+    const surface = (msg: string) => !unmounting && setError(msg);
+
+    ws.addEventListener("error", () => surface(DISCONNECTED));
+
+    ws.addEventListener("close", (ev) => {
+      if (ev.code === 4401 || ev.code === 4403) {
+        surface(`events feed rejected (${ev.code}) — reload the page`);
+      } else if (ev.code !== 1000) {
+        surface(DISCONNECTED);
+      }
+    });
+
+    ws.addEventListener("message", (ev) => {
+      let frame: RpcEnvelope;
+
+      try {
+        frame = JSON.parse(ev.data);
+      } catch {
+        return;
+      }
+
+      if (frame.method !== "event" || !frame.params) {
+        return;
+      }
+
+      const { type, payload } = frame.params;
+
+      if (type === "tool.start") {
+        const p = payload as
+          | { tool_id?: string; name?: string; context?: string }
+          | undefined;
+        const toolId = p?.tool_id;
+
+        if (!toolId) {
+          return;
+        }
+
+        setTools((prev) =>
+          [
+            ...prev,
+            {
+              kind: "tool" as const,
+              id: `tool-${toolId}-${prev.length}`,
+              tool_id: toolId,
+              name: p?.name ?? "tool",
+              context: p?.context,
+              status: "running" as const,
+              startedAt: Date.now(),
+            },
+          ].slice(-TOOL_LIMIT),
+        );
+      } else if (type === "tool.progress") {
+        const p = payload as
+          | { name?: string; preview?: string }
+          | undefined;
+
+        if (!p?.name || !p.preview) {
+          return;
+        }
+
+        setTools((prev) =>
+          prev.map((t) =>
+            t.status === "running" && t.name === p.name
+              ? { ...t, preview: p.preview }
+              : t,
+          ),
+        );
+      } else if (type === "tool.complete") {
+        const p = payload as
+          | {
+              tool_id?: string;
+              summary?: string;
+              error?: string;
+              inline_diff?: string;
+            }
+          | undefined;
+
+        if (!p?.tool_id) {
+          return;
+        }
+
+        setTools((prev) =>
+          prev.map((t) =>
+            t.tool_id === p.tool_id
+              ? {
+                  ...t,
+                  status: p.error ? "error" : "done",
+                  summary: p.summary,
+                  error: p.error,
+                  inline_diff: p.inline_diff,
+                  completedAt: Date.now(),
+                }
+              : t,
+          ),
+        );
+      }
+    });
+
+    return () => {
+      unmounting = true;
+      ws.close();
+    };
+  }, [channel, version]);
+
+  const reconnect = useCallback(() => {
+    setError(null);
+    setTools([]);
+    setVersion((v) => v + 1);
+  }, []);
+
+  // Picker hands us a fully-formed slash command (e.g. "/model anthropic/...").
+  // Fire-and-forget through `slash.exec`; the TUI pane will render the result
+  // via PTY, so the sidebar doesn't need to surface output of its own.
+  const onModelSubmit = useCallback(
+    (slashCommand: string) => {
+      if (!sessionId) {
+        return;
+      }
+
+      void gw.request("slash.exec", {
+        session_id: sessionId,
+        command: slashCommand,
+      });
+      setModelOpen(false);
+    },
+    [gw, sessionId],
+  );
+
+  const canPickModel = state === "open" && !!sessionId;
+  const modelLabel = (info.model ?? "—").split("/").slice(-1)[0] ?? "—";
+  const banner = error ?? info.credential_warning ?? null;
+
+  return (
+    <aside
+      className={cn(
+        "flex h-full w-full min-w-0 shrink-0 flex-col gap-3 normal-case lg:w-80",
+        className,
+      )}
+    >
+      <Card className="flex items-center justify-between gap-2 px-3 py-2">
+        <div className="min-w-0">
+          <div className="text-xs uppercase tracking-wider text-muted-foreground">
+            model
+          </div>
+
+          <button
+            type="button"
+            disabled={!canPickModel}
+            onClick={() => setModelOpen(true)}
+            className="flex items-center gap-1 truncate text-sm font-medium hover:underline disabled:cursor-not-allowed disabled:opacity-60 disabled:no-underline"
+            title={info.model ?? "switch model"}
+          >
+            <span className="truncate">{modelLabel}</span>
+
+            {canPickModel && (
+              <ChevronDown className="h-3 w-3 shrink-0 opacity-60" />
+            )}
+          </button>
+        </div>
+
+        <Badge className={STATE_TONE[state]}>{STATE_LABEL[state]}</Badge>
+      </Card>
+
+      {banner && (
+        <Card className="flex items-start gap-2 border-destructive/40 bg-destructive/5 px-3 py-2 text-xs">
+          <AlertCircle className="mt-0.5 h-3.5 w-3.5 shrink-0 text-destructive" />
+
+          <div className="min-w-0 flex-1">
+            <div className="wrap-break-word text-destructive">{banner}</div>
+
+            {error && (
+              <Button
+                variant="ghost"
+                size="sm"
+                className="mt-1 h-6 px-1.5 text-xs"
+                onClick={reconnect}
+              >
+                <RefreshCw className="mr-1 h-3 w-3" />
+                reconnect
+              </Button>
+            )}
+          </div>
+        </Card>
+      )}
+
+      <Card className="flex min-h-0 flex-1 flex-col px-2 py-2">
+        <div className="px-1 pb-2 text-xs uppercase tracking-wider text-muted-foreground">
+          tools
+        </div>
+
+        <div className="flex min-h-0 flex-1 flex-col gap-1.5 overflow-y-auto pr-1">
+          {tools.length === 0 ? (
+            <div className="px-2 py-4 text-center text-xs text-muted-foreground">
+              no tool calls yet
+            </div>
+          ) : (
+            tools.map((t) => <ToolCall key={t.id} tool={t} />)
+          )}
+        </div>
+      </Card>
+
+      {modelOpen && canPickModel && sessionId && (
+        <ModelPickerDialog
+          gw={gw}
+          sessionId={sessionId}
+          onClose={() => setModelOpen(false)}
+          onSubmit={onModelSubmit}
+        />
+      )}
+    </aside>
+  );
+}
--- a/web/src/components/Markdown.tsx
+++ b/web/src/components/Markdown.tsx
@ -1,22 +1,50 @@
-import { useMemo } from "react";
+import { useMemo, type ReactNode } from "react";

 /**
 * Lightweight markdown renderer for LLM output.
 * Handles: code blocks, inline code, bold, italic, headers, links, lists, horizontal rules.
 * NOT a full CommonMark parser — optimized for typical assistant message patterns.
+ *
+ * `streaming` renders a blinking caret at the tail of the last block so it
+ * appears to hug the final character instead of wrapping onto a new line
+ * after a block element (paragraph/list/code/…).
 */
-export function Markdown({ content, highlightTerms }: { content: string; highlightTerms?: string[] }) {
+export function Markdown({
+  content,
+  highlightTerms,
+  streaming,
+}: {
+  content: string;
+  highlightTerms?: string[];
+  streaming?: boolean;
+}) {
  const blocks = useMemo(() => parseBlocks(content), [content]);
+  const caret = streaming ? <StreamingCaret /> : null;

  return (
    <div className="text-sm text-foreground leading-relaxed space-y-2">
      {blocks.map((block, i) => (
-        <Block key={i} block={block} highlightTerms={highlightTerms} />
+        <Block
+          key={i}
+          block={block}
+          highlightTerms={highlightTerms}
+          caret={caret && i === blocks.length - 1 ? caret : null}
+        />
      ))}
+      {blocks.length === 0 && caret}
    </div>
  );
 }

+function StreamingCaret() {
+  return (
+    <span
+      aria-hidden
+      className="inline-block w-[0.5em] h-[1em] ml-0.5 align-[-0.15em] bg-foreground/50 animate-pulse"
+    />
+  );
+}
+
 /* ------------------------------------------------------------------ */
 /*  Types                                                              */
 /* ------------------------------------------------------------------ */
@ -58,7 +86,11 @@ function parseBlocks(text: string): BlockNode[] {
    // Heading
    const headingMatch = line.match(/^(#{1,4})\s+(.+)/);
    if (headingMatch) {
-      blocks.push({ type: "heading", level: headingMatch[1].length, content: headingMatch[2] });
+      blocks.push({
+        type: "heading",
+        level: headingMatch[1].length,
+        content: headingMatch[2],
+      });
      i++;
      continue;
    }
@ -124,12 +156,23 @@ function parseBlocks(text: string): BlockNode[] {
 /*  Block renderer                                                     */
 /* ------------------------------------------------------------------ */

-function Block({ block, highlightTerms }: { block: BlockNode; highlightTerms?: string[] }) {
+function Block({
+  block,
+  highlightTerms,
+  caret,
+}: {
+  block: BlockNode;
+  highlightTerms?: string[];
+  caret?: ReactNode;
+}) {
  switch (block.type) {
    case "code":
      return (
        <pre className="bg-secondary/60 border border-border px-3 py-2.5 text-xs font-mono leading-relaxed overflow-x-auto">
-          <code>{block.content}</code>
+          <code>
+            {block.content}
+            {caret}
+          </code>
        </pre>
      );

@ -141,25 +184,46 @@ function Block({ block, highlightTerms }: { block: BlockNode; highlightTerms?: s
        h3: "text-sm font-semibold",
        h4: "text-sm font-medium",
      };
-      return <Tag className={sizes[Tag]}><InlineContent text={block.content} highlightTerms={highlightTerms} /></Tag>;
+      return (
+        <Tag className={sizes[Tag]}>
+          <InlineContent text={block.content} highlightTerms={highlightTerms} />
+          {caret}
+        </Tag>
+      );
    }

    case "hr":
-      return <hr className="border-border" />;
+      return (
+        <>
+          <hr className="border-border" />
+          {caret}
+        </>
+      );

    case "list": {
      const Tag = block.ordered ? "ol" : "ul";
+      const last = block.items.length - 1;
      return (
-        <Tag className={`space-y-0.5 ${block.ordered ? "list-decimal" : "list-disc"} pl-5 text-sm`}>
+        <Tag
+          className={`space-y-0.5 ${block.ordered ? "list-decimal" : "list-disc"} pl-5 text-sm`}
+        >
          {block.items.map((item, i) => (
-            <li key={i}><InlineContent text={item} highlightTerms={highlightTerms} /></li>
+            <li key={i}>
+              <InlineContent text={item} highlightTerms={highlightTerms} />
+              {i === last ? caret : null}
+            </li>
          ))}
        </Tag>
      );
    }

    case "paragraph":
-      return <p><InlineContent text={block.content} highlightTerms={highlightTerms} /></p>;
+      return (
+        <p>
+          <InlineContent text={block.content} highlightTerms={highlightTerms} />
+          {caret}
+        </p>
+      );
  }
 }

@ -178,7 +242,8 @@ type InlineNode =
 function parseInline(text: string): InlineNode[] {
  const nodes: InlineNode[] = [];
  // Pattern priority: code > link > bold > italic > bare URL > line break
-  const pattern = /(`[^`]+`)|(\[([^\]]+)\]\(([^)]+)\))|(\*\*([^*]+)\*\*)|(\*([^*]+)\*)|(\bhttps?:\/\/[^\s<>)\]]+)|(\n)/g;
+  const pattern =
+    /(`[^`]+`)|(\[([^\]]+)\]\(([^)]+)\))|(\*\*([^*]+)\*\*)|(\*([^*]+)\*)|(\bhttps?:\/\/[^\s<>)\]]+)|(\n)/g;
  let lastIndex = 0;
  let match: RegExpExecArray | null;

@ -217,7 +282,13 @@ function parseInline(text: string): InlineNode[] {
  return nodes;
 }

-function InlineContent({ text, highlightTerms }: { text: string; highlightTerms?: string[] }) {
+function InlineContent({
+  text,
+  highlightTerms,
+}: {
+  text: string;
+  highlightTerms?: string[];
+}) {
  const nodes = useMemo(() => parseInline(text), [text]);

  return (
@ -225,17 +296,34 @@ function InlineContent({ text, highlightTerms }: { text: string; highlightTerms?
      {nodes.map((node, i) => {
        switch (node.type) {
          case "text":
-            return <HighlightedText key={i} text={node.content} terms={highlightTerms} />;
+            return (
+              <HighlightedText
+                key={i}
+                text={node.content}
+                terms={highlightTerms}
+              />
+            );
          case "code":
            return (
-              <code key={i} className="bg-secondary/60 px-1.5 py-0.5 text-xs font-mono text-primary/90">
+              <code
+                key={i}
+                className="bg-secondary/60 px-1.5 py-0.5 text-xs font-mono text-primary/90"
+              >
                {node.content}
              </code>
            );
          case "bold":
-            return <strong key={i} className="font-semibold"><HighlightedText text={node.content} terms={highlightTerms} /></strong>;
+            return (
+              <strong key={i} className="font-semibold">
+                <HighlightedText text={node.content} terms={highlightTerms} />
+              </strong>
+            );
          case "italic":
-            return <em key={i}><HighlightedText text={node.content} terms={highlightTerms} /></em>;
+            return (
+              <em key={i}>
+                <HighlightedText text={node.content} terms={highlightTerms} />
+              </em>
+            );
          case "link":
            return (
              <a
@ -269,10 +357,12 @@ function HighlightedText({ text, terms }: { text: string; terms?: string[] }) {
    <>
      {parts.map((part, i) =>
        regex.test(part) ? (
-          <mark key={i} className="bg-warning/30 text-warning px-0.5">{part}</mark>
+          <mark key={i} className="bg-warning/30 text-warning px-0.5">
+            {part}
+          </mark>
        ) : (
          <span key={i}>{part}</span>
-        )
+        ),
      )}
    </>
  );
--- a/web/src/components/ModelPickerDialog.tsx
+++ b/web/src/components/ModelPickerDialog.tsx
@ -0,0 +1,392 @@
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+import type { GatewayClient } from "@/lib/gatewayClient";
+import { Check, Loader2, Search, X } from "lucide-react";
+import { useEffect, useMemo, useRef, useState } from "react";
+
+/**
+ * Two-stage model picker modal.
+ *
+ * Mirrors ui-tui/src/components/modelPicker.tsx:
+ *   Stage 1: pick provider (authenticated providers only)
+ *   Stage 2: pick model within that provider
+ *
+ * On confirm, emits `/model <model> --provider <slug> [--global]` through
+ * the parent callback so ChatPage can dispatch it via the existing slash
+ * pipeline. That keeps persistence + actual switch logic in one place.
+ */
+
+interface ModelOptionProvider {
+  name: string;
+  slug: string;
+  models?: string[];
+  total_models?: number;
+  is_current?: boolean;
+  warning?: string;
+}
+
+interface ModelOptionsResponse {
+  model?: string;
+  provider?: string;
+  providers?: ModelOptionProvider[];
+}
+
+interface Props {
+  gw: GatewayClient;
+  sessionId: string;
+  onClose(): void;
+  /** Parent runs the resulting slash command through slashExec. */
+  onSubmit(slashCommand: string): void;
+}
+
+export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
+  const [providers, setProviders] = useState<ModelOptionProvider[]>([]);
+  const [currentModel, setCurrentModel] = useState("");
+  const [currentProviderSlug, setCurrentProviderSlug] = useState("");
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState<string | null>(null);
+  const [selectedSlug, setSelectedSlug] = useState("");
+  const [selectedModel, setSelectedModel] = useState("");
+  const [query, setQuery] = useState("");
+  const [persistGlobal, setPersistGlobal] = useState(false);
+  const closedRef = useRef(false);
+
+  // Load providers + models on open.
+  useEffect(() => {
+    closedRef.current = false;
+
+    gw.request<ModelOptionsResponse>(
+      "model.options",
+      sessionId ? { session_id: sessionId } : {},
+    )
+      .then((r) => {
+        if (closedRef.current) return;
+        const next = r?.providers ?? [];
+        setProviders(next);
+        setCurrentModel(String(r?.model ?? ""));
+        setCurrentProviderSlug(String(r?.provider ?? ""));
+        setSelectedSlug(
+          (next.find((p) => p.is_current) ?? next[0])?.slug ?? "",
+        );
+        setSelectedModel("");
+        setLoading(false);
+      })
+      .catch((e) => {
+        if (closedRef.current) return;
+        setError(e instanceof Error ? e.message : String(e));
+        setLoading(false);
+      });
+
+    return () => {
+      closedRef.current = true;
+    };
+  }, [gw, sessionId]);
+
+  // Esc closes.
+  useEffect(() => {
+    const onKey = (e: KeyboardEvent) => {
+      if (e.key === "Escape") {
+        e.preventDefault();
+        onClose();
+      }
+    };
+    window.addEventListener("keydown", onKey);
+    return () => window.removeEventListener("keydown", onKey);
+  }, [onClose]);
+
+  const selectedProvider = useMemo(
+    () => providers.find((p) => p.slug === selectedSlug) ?? null,
+    [providers, selectedSlug],
+  );
+
+  const models = useMemo(
+    () => selectedProvider?.models ?? [],
+    [selectedProvider],
+  );
+
+  const needle = query.trim().toLowerCase();
+
+  const filteredProviders = useMemo(
+    () =>
+      !needle
+        ? providers
+        : providers.filter(
+            (p) =>
+              p.name.toLowerCase().includes(needle) ||
+              p.slug.toLowerCase().includes(needle) ||
+              (p.models ?? []).some((m) => m.toLowerCase().includes(needle)),
+          ),
+    [providers, needle],
+  );
+
+  const filteredModels = useMemo(
+    () =>
+      !needle ? models : models.filter((m) => m.toLowerCase().includes(needle)),
+    [models, needle],
+  );
+
+  const canConfirm = !!selectedProvider && !!selectedModel;
+
+  const confirm = () => {
+    if (!canConfirm) return;
+    const global = persistGlobal ? " --global" : "";
+    onSubmit(
+      `/model ${selectedModel} --provider ${selectedProvider.slug}${global}`,
+    );
+    onClose();
+  };
+
+  return (
+    <div
+      className="fixed inset-0 z-[100] flex items-center justify-center bg-background/85 backdrop-blur-sm p-4"
+      onClick={(e) => e.target === e.currentTarget && onClose()}
+      role="dialog"
+      aria-modal="true"
+      aria-labelledby="model-picker-title"
+    >
+      <div className="relative w-full max-w-3xl max-h-[80vh] border border-border bg-card shadow-2xl flex flex-col">
+        <button
+          type="button"
+          onClick={onClose}
+          className="absolute right-3 top-3 text-muted-foreground hover:text-foreground transition-colors cursor-pointer"
+          aria-label="Close"
+        >
+          <X className="h-5 w-5" />
+        </button>
+
+        <header className="p-5 pb-3 border-b border-border">
+          <h2
+            id="model-picker-title"
+            className="font-display text-base tracking-wider uppercase"
+          >
+            Switch Model
+          </h2>
+          <p className="text-xs text-muted-foreground mt-1 font-mono">
+            current: {currentModel || "(unknown)"}
+            {currentProviderSlug && ` · ${currentProviderSlug}`}
+          </p>
+        </header>
+
+        <div className="px-5 pt-3 pb-2 border-b border-border">
+          <div className="relative">
+            <Search className="absolute left-2 top-1/2 -translate-y-1/2 h-3.5 w-3.5 text-muted-foreground" />
+            <Input
+              autoFocus
+              placeholder="Filter providers and models…"
+              value={query}
+              onChange={(e) => setQuery(e.target.value)}
+              className="pl-7 h-8 text-sm"
+            />
+          </div>
+        </div>
+
+        <div className="flex-1 min-h-0 grid grid-cols-[200px_1fr] overflow-hidden">
+          <ProviderColumn
+            loading={loading}
+            error={error}
+            providers={filteredProviders}
+            total={providers.length}
+            selectedSlug={selectedSlug}
+            query={needle}
+            onSelect={(slug) => {
+              setSelectedSlug(slug);
+              setSelectedModel("");
+            }}
+          />
+
+          <ModelColumn
+            provider={selectedProvider}
+            models={filteredModels}
+            allModels={models}
+            selectedModel={selectedModel}
+            currentModel={currentModel}
+            currentProviderSlug={currentProviderSlug}
+            onSelect={setSelectedModel}
+            onConfirm={(m) => {
+              setSelectedModel(m);
+              // Confirm on next tick so state settles.
+              window.setTimeout(confirm, 0);
+            }}
+          />
+        </div>
+
+        <footer className="border-t border-border p-3 flex items-center justify-between gap-3 flex-wrap">
+          <label className="flex items-center gap-2 text-xs text-muted-foreground cursor-pointer select-none">
+            <input
+              type="checkbox"
+              checked={persistGlobal}
+              onChange={(e) => setPersistGlobal(e.target.checked)}
+              className="cursor-pointer"
+            />
+            Persist globally (otherwise this session only)
+          </label>
+
+          <div className="flex items-center gap-2 ml-auto">
+            <Button variant="ghost" size="sm" onClick={onClose}>
+              Cancel
+            </Button>
+            <Button size="sm" onClick={confirm} disabled={!canConfirm}>
+              Switch
+            </Button>
+          </div>
+        </footer>
+      </div>
+    </div>
+  );
+}
+
+/* ------------------------------------------------------------------ */
+/*  Provider column                                                    */
+/* ------------------------------------------------------------------ */
+
+function ProviderColumn({
+  loading,
+  error,
+  providers,
+  total,
+  selectedSlug,
+  query,
+  onSelect,
+}: {
+  loading: boolean;
+  error: string | null;
+  providers: ModelOptionProvider[];
+  total: number;
+  selectedSlug: string;
+  query: string;
+  onSelect(slug: string): void;
+}) {
+  return (
+    <div className="border-r border-border overflow-y-auto">
+      {loading && (
+        <div className="flex items-center gap-2 p-4 text-xs text-muted-foreground">
+          <Loader2 className="h-3 w-3 animate-spin" /> loading…
+        </div>
+      )}
+
+      {error && <div className="p-4 text-xs text-destructive">{error}</div>}
+
+      {!loading && !error && providers.length === 0 && (
+        <div className="p-4 text-xs text-muted-foreground italic">
+          {query
+            ? "no matches"
+            : total === 0
+              ? "no authenticated providers"
+              : "no matches"}
+        </div>
+      )}
+
+      {providers.map((p) => {
+        const active = p.slug === selectedSlug;
+        return (
+          <button
+            key={p.slug}
+            type="button"
+            onClick={() => onSelect(p.slug)}
+            className={`w-full text-left px-3 py-2 text-xs border-l-2 transition-colors cursor-pointer flex items-start gap-2 ${
+              active
+                ? "bg-primary/10 border-l-primary text-foreground"
+                : "border-l-transparent text-muted-foreground hover:text-foreground hover:bg-muted/40"
+            }`}
+          >
+            <div className="flex-1 min-w-0">
+              <div className="flex items-center gap-1.5">
+                <span className="font-medium truncate">{p.name}</span>
+                {p.is_current && <CurrentTag />}
+              </div>
+              <div className="text-[0.65rem] text-muted-foreground/80 font-mono truncate">
+                {p.slug} · {p.total_models ?? p.models?.length ?? 0} models
+              </div>
+            </div>
+          </button>
+        );
+      })}
+    </div>
+  );
+}
+
+/* ------------------------------------------------------------------ */
+/*  Model column                                                       */
+/* ------------------------------------------------------------------ */
+
+function ModelColumn({
+  provider,
+  models,
+  allModels,
+  selectedModel,
+  currentModel,
+  currentProviderSlug,
+  onSelect,
+  onConfirm,
+}: {
+  provider: ModelOptionProvider | null;
+  models: string[];
+  allModels: string[];
+  selectedModel: string;
+  currentModel: string;
+  currentProviderSlug: string;
+  onSelect(model: string): void;
+  onConfirm(model: string): void;
+}) {
+  if (!provider) {
+    return (
+      <div className="overflow-y-auto">
+        <div className="p-4 text-xs text-muted-foreground italic">
+          pick a provider →
+        </div>
+      </div>
+    );
+  }
+
+  return (
+    <div className="overflow-y-auto">
+      {provider.warning && (
+        <div className="p-3 text-xs text-destructive border-b border-border">
+          {provider.warning}
+        </div>
+      )}
+
+      {models.length === 0 ? (
+        <div className="p-4 text-xs text-muted-foreground italic">
+          {allModels.length
+            ? "no models match your filter"
+            : "no models listed for this provider"}
+        </div>
+      ) : (
+        models.map((m) => {
+          const active = m === selectedModel;
+          const isCurrent =
+            m === currentModel && provider.slug === currentProviderSlug;
+
+          return (
+            <button
+              key={m}
+              type="button"
+              onClick={() => onSelect(m)}
+              onDoubleClick={() => onConfirm(m)}
+              className={`w-full text-left px-3 py-1.5 text-xs font-mono transition-colors cursor-pointer flex items-center gap-2 ${
+                active
+                  ? "bg-primary/15 text-foreground"
+                  : "text-muted-foreground hover:text-foreground hover:bg-muted/40"
+              }`}
+            >
+              <Check
+                className={`h-3 w-3 shrink-0 ${active ? "text-primary" : "text-transparent"}`}
+              />
+              <span className="flex-1 truncate">{m}</span>
+              {isCurrent && <CurrentTag />}
+            </button>
+          );
+        })
+      )}
+    </div>
+  );
+}
+
+function CurrentTag() {
+  return (
+    <span className="text-[0.6rem] uppercase tracking-wider text-primary/80 shrink-0">
+      current
+    </span>
+  );
+}
--- a/web/src/components/SlashPopover.tsx
+++ b/web/src/components/SlashPopover.tsx
@ -0,0 +1,174 @@
+import type { GatewayClient } from "@/lib/gatewayClient";
+import { ChevronRight } from "lucide-react";
+import {
+  forwardRef,
+  useCallback,
+  useEffect,
+  useImperativeHandle,
+  useRef,
+  useState,
+} from "react";
+
+/**
+ * Slash-command autocomplete popover, rendered above the composer in ChatPage.
+ * Mirrors the completion UX of the Ink TUI — type `/`, see matching commands,
+ * arrow keys or click to select, Tab to apply, Enter to submit.
+ *
+ * The parent owns all keyboard handling via `ref.handleKey`, which returns
+ * true when the popover consumed the event, so the composer's Enter/arrow
+ * logic stays in one place.
+ */
+
+export interface CompletionItem {
+  display: string;
+  text: string;
+  meta?: string;
+}
+
+export interface SlashPopoverHandle {
+  /** Returns true if the key was consumed by the popover. */
+  handleKey(e: React.KeyboardEvent<HTMLTextAreaElement>): boolean;
+}
+
+interface Props {
+  input: string;
+  gw: GatewayClient | null;
+  onApply(nextInput: string): void;
+}
+
+interface CompletionResponse {
+  items?: CompletionItem[];
+  replace_from?: number;
+}
+
+const DEBOUNCE_MS = 60;
+
+export const SlashPopover = forwardRef<SlashPopoverHandle, Props>(
+  function SlashPopover({ input, gw, onApply }, ref) {
+    const [items, setItems] = useState<CompletionItem[]>([]);
+    const [selected, setSelected] = useState(0);
+    const [replaceFrom, setReplaceFrom] = useState(1);
+    const lastInputRef = useRef<string>("");
+
+    // Debounced completion fetch. We never clear `items` in the effect body
+    // (doing so would flag react-hooks/set-state-in-effect); instead the
+    // render guard below hides stale items once the input stops matching.
+    useEffect(() => {
+      const trimmed = input ?? "";
+
+      if (!gw || !trimmed.startsWith("/") || trimmed === lastInputRef.current) {
+        if (!trimmed.startsWith("/")) lastInputRef.current = "";
+        return;
+      }
+      lastInputRef.current = trimmed;
+
+      const timer = window.setTimeout(async () => {
+        if (lastInputRef.current !== trimmed) return;
+        try {
+          const r = await gw.request<CompletionResponse>("complete.slash", {
+            text: trimmed,
+          });
+          if (lastInputRef.current !== trimmed) return;
+          setItems(r?.items ?? []);
+          setReplaceFrom(r?.replace_from ?? 1);
+          setSelected(0);
+        } catch {
+          if (lastInputRef.current === trimmed) setItems([]);
+        }
+      }, DEBOUNCE_MS);
+
+      return () => window.clearTimeout(timer);
+    }, [input, gw]);
+
+    const apply = useCallback(
+      (item: CompletionItem) => {
+        onApply(input.slice(0, replaceFrom) + item.text);
+      },
+      [input, replaceFrom, onApply],
+    );
+
+    // Only consume keys when the popover is actually visible. Stale items from
+    // a previous slash prefix are ignored once the user deletes the "/".
+    const visible = items.length > 0 && input.startsWith("/");
+
+    useImperativeHandle(
+      ref,
+      () => ({
+        handleKey: (e) => {
+          if (!visible) return false;
+
+          switch (e.key) {
+            case "ArrowDown":
+              e.preventDefault();
+              setSelected((s) => (s + 1) % items.length);
+              return true;
+
+            case "ArrowUp":
+              e.preventDefault();
+              setSelected((s) => (s - 1 + items.length) % items.length);
+              return true;
+
+            case "Tab": {
+              e.preventDefault();
+              const item = items[selected];
+              if (item) apply(item);
+              return true;
+            }
+
+            case "Escape":
+              e.preventDefault();
+              setItems([]);
+              return true;
+
+            default:
+              return false;
+          }
+        },
+      }),
+      [visible, items, selected, apply],
+    );
+
+    if (!visible) return null;
+
+    return (
+      <div
+        className="absolute bottom-full left-0 right-0 mb-2 max-h-64 overflow-y-auto rounded-md border border-border bg-popover shadow-xl text-sm"
+        role="listbox"
+      >
+        {items.map((it, i) => {
+          const active = i === selected;
+
+          return (
+            <button
+              key={`${it.text}-${i}`}
+              type="button"
+              role="option"
+              aria-selected={active}
+              onMouseEnter={() => setSelected(i)}
+              onClick={() => apply(it)}
+              className={`w-full flex items-center gap-2 px-3 py-1.5 text-left cursor-pointer transition-colors ${
+                active
+                  ? "bg-primary/10 text-foreground"
+                  : "text-muted-foreground hover:bg-muted/60"
+              }`}
+            >
+              <ChevronRight
+                className={`h-3 w-3 shrink-0 ${active ? "text-primary" : "text-transparent"}`}
+              />
+
+              <span className="font-mono text-xs shrink-0 truncate">
+                {it.display}
+              </span>
+
+              {it.meta && (
+                <span className="text-[0.7rem] text-muted-foreground/70 truncate ml-auto">
+                  {it.meta}
+                </span>
+              )}
+            </button>
+          );
+        })}
+      </div>
+    );
+  },
+);
--- a/web/src/components/ToolCall.tsx
+++ b/web/src/components/ToolCall.tsx
@ -0,0 +1,228 @@
+import {
+  AlertCircle,
+  Check,
+  ChevronDown,
+  ChevronRight,
+  Zap,
+} from "lucide-react";
+import { useEffect, useState } from "react";
+
+/**
+ * Expandable tool call row — the web equivalent of Ink's ToolTrail node.
+ *
+ * Renders one `tool.start` + `tool.complete` pair (plus any `tool.progress`
+ * in between) as a single collapsible item in the transcript:
+ *
+ *   ▸ ● read_file(path=/foo)                         2.3s
+ *
+ * Click the header to reveal a preformatted body with context (args), the
+ * streaming preview (while running), and the final summary or error. Error
+ * rows auto-expand so failures aren't silently collapsed.
+ */
+
+export interface ToolEntry {
+  kind: "tool";
+  id: string;
+  tool_id: string;
+  name: string;
+  context?: string;
+  preview?: string;
+  summary?: string;
+  error?: string;
+  inline_diff?: string;
+  status: "running" | "done" | "error";
+  startedAt: number;
+  completedAt?: number;
+}
+
+const STATUS_TONE: Record<ToolEntry["status"], string> = {
+  running: "border-primary/40 bg-primary/[0.04]",
+  done: "border-border bg-muted/20",
+  error: "border-destructive/50 bg-destructive/[0.04]",
+};
+
+const BULLET_TONE: Record<ToolEntry["status"], string> = {
+  running: "text-primary",
+  done: "text-primary/80",
+  error: "text-destructive",
+};
+
+const TICK_MS = 500;
+
+export function ToolCall({ tool }: { tool: ToolEntry }) {
+  // `open` is derived: errors default-expanded, everything else collapsed.
+  // `null` means "follow the default"; any explicit bool is the user's override.
+  // This lets a running tool flip to expanded automatically when it errors,
+  // without mirroring state in an effect.
+  const [userOverride, setUserOverride] = useState<boolean | null>(null);
+  const open = userOverride ?? tool.status === "error";
+
+  // Tick `now` while the tool is running so the elapsed label updates live.
+  const [now, setNow] = useState(() => Date.now());
+  useEffect(() => {
+    if (tool.status !== "running") return;
+    const id = window.setInterval(() => setNow(() => Date.now()), TICK_MS);
+    return () => window.clearInterval(id);
+  }, [tool.status]);
+
+  // Historical tools (hydrated from session.resume) signal missing timestamps
+  // with `startedAt === 0`; we hide the elapsed badge for those rather than
+  // rendering a misleading "0ms".
+  const hasTimestamps = tool.startedAt > 0;
+  const elapsed = hasTimestamps
+    ? fmtElapsed((tool.completedAt ?? now) - tool.startedAt)
+    : null;
+
+  const hasBody = !!(
+    tool.context ||
+    tool.preview ||
+    tool.summary ||
+    tool.error ||
+    tool.inline_diff
+  );
+
+  const Chevron = open ? ChevronDown : ChevronRight;
+
+  return (
+    <div
+      className={`rounded-md border overflow-hidden ${STATUS_TONE[tool.status]}`}
+    >
+      <button
+        type="button"
+        onClick={() => setUserOverride(!open)}
+        disabled={!hasBody}
+        aria-expanded={open}
+        className="w-full flex items-center gap-2 px-2.5 py-1.5 text-left text-xs hover:bg-foreground/2 disabled:cursor-default cursor-pointer transition-colors"
+      >
+        {hasBody ? (
+          <Chevron className="h-3 w-3 shrink-0 text-muted-foreground" />
+        ) : (
+          <span className="w-3 shrink-0" />
+        )}
+
+        <Zap className={`h-3 w-3 shrink-0 ${BULLET_TONE[tool.status]}`} />
+
+        <span className="font-mono font-medium shrink-0">{tool.name}</span>
+
+        <span className="font-mono text-muted-foreground/80 truncate min-w-0 flex-1">
+          {tool.context ?? ""}
+        </span>
+
+        {tool.status === "running" && (
+          <span
+            className="inline-block h-2 w-2 rounded-full bg-primary animate-pulse shrink-0"
+            title="running"
+          />
+        )}
+        {tool.status === "error" && (
+          <AlertCircle
+            className="h-3 w-3 shrink-0 text-destructive"
+            aria-label="error"
+          />
+        )}
+        {tool.status === "done" && (
+          <Check
+            className="h-3 w-3 shrink-0 text-primary/80"
+            aria-label="done"
+          />
+        )}
+
+        {elapsed && (
+          <span className="font-mono text-[0.65rem] text-muted-foreground tabular-nums shrink-0">
+            {elapsed}
+          </span>
+        )}
+      </button>
+
+      {open && hasBody && (
+        <div className="border-t border-border/60 px-3 py-2 space-y-2 text-xs font-mono">
+          {tool.context && <Section label="context">{tool.context}</Section>}
+
+          {tool.preview && tool.status === "running" && (
+            <Section label="streaming">
+              {tool.preview}
+              <span className="inline-block w-1.5 h-3 align-middle bg-foreground/40 ml-0.5 animate-pulse" />
+            </Section>
+          )}
+
+          {tool.inline_diff && (
+            <Section label="diff">
+              <pre className="whitespace-pre overflow-x-auto text-[0.7rem] leading-snug">
+                {colorizeDiff(tool.inline_diff)}
+              </pre>
+            </Section>
+          )}
+
+          {tool.summary && (
+            <Section label="result">
+              <span className="text-foreground/90 whitespace-pre-wrap">
+                {tool.summary}
+              </span>
+            </Section>
+          )}
+
+          {tool.error && (
+            <Section label="error" tone="error">
+              <span className="text-destructive whitespace-pre-wrap">
+                {tool.error}
+              </span>
+            </Section>
+          )}
+        </div>
+      )}
+    </div>
+  );
+}
+
+function Section({
+  label,
+  children,
+  tone,
+}: {
+  label: string;
+  children: React.ReactNode;
+  tone?: "error";
+}) {
+  return (
+    <div className="flex gap-3">
+      <span
+        className={`uppercase tracking-wider text-[0.6rem] shrink-0 w-14 pt-0.5 ${
+          tone === "error" ? "text-destructive/80" : "text-muted-foreground/60"
+        }`}
+      >
+        {label}
+      </span>
+
+      <div className="flex-1 min-w-0 text-muted-foreground">{children}</div>
+    </div>
+  );
+}
+
+function fmtElapsed(ms: number): string {
+  const sec = Math.max(0, ms) / 1000;
+  if (sec < 1) return `${Math.round(ms)}ms`;
+  if (sec < 10) return `${sec.toFixed(1)}s`;
+  if (sec < 60) return `${Math.round(sec)}s`;
+
+  const m = Math.floor(sec / 60);
+  const s = Math.round(sec % 60);
+  return s ? `${m}m ${s}s` : `${m}m`;
+}
+
+/** Colorize unified-diff lines for the inline diff section. */
+function colorizeDiff(diff: string): React.ReactNode {
+  return diff.split("\n").map((line, i) => (
+    <div key={i} className={diffLineClass(line)}>
+      {line || "\u00A0"}
+    </div>
+  ));
+}
+
+function diffLineClass(line: string): string {
+  if (line.startsWith("+") && !line.startsWith("+++"))
+    return "text-emerald-500 dark:text-emerald-400";
+  if (line.startsWith("-") && !line.startsWith("---"))
+    return "text-destructive";
+  if (line.startsWith("@@")) return "text-primary";
+  return "text-muted-foreground/80";
+}
--- a/web/src/contexts/PageHeaderProvider.tsx
+++ b/web/src/contexts/PageHeaderProvider.tsx
@ -34,6 +34,8 @@ export function PageHeaderProvider({
  );
  const displayTitle = titleOverride ?? defaultTitle;

+  const isChatRoute = pathname === "/chat" || pathname === "/chat/";
+
  const value = useMemo(
    () => ({
      setAfterTitle,
@ -59,8 +61,10 @@ export function PageHeaderProvider({
        >
          <div
            className={cn(
-              "flex h-full w-full min-w-0 flex-1 flex-col justify-center gap-2",
-              "px-3 py-2 sm:flex-row sm:items-center sm:gap-3 sm:px-6 sm:py-0",
+              "flex h-full w-full min-w-0 flex-1 gap-2 px-3 py-2 sm:gap-3 sm:px-6 sm:py-0",
+              isChatRoute
+                ? "flex-row items-center"
+                : "flex-col justify-center sm:flex-row sm:items-center",
            )}
          >
            <div className="flex min-w-0 flex-1 items-center gap-2 sm:gap-3">
@ -74,7 +78,12 @@ export function PageHeaderProvider({
            </div>

            {end ? (
-              <div className="flex w-full min-w-0 justify-end sm:max-w-md sm:flex-1">
+              <div
+                className={cn(
+                  "flex min-w-0 justify-end sm:max-w-md sm:flex-1",
+                  isChatRoute ? "w-auto shrink-0" : "w-full",
+                )}
+              >
                {end}
              </div>
            ) : null}
@ -84,7 +93,9 @@ export function PageHeaderProvider({
        <main
          className={cn(
            "min-h-0 w-full min-w-0 flex-1 flex flex-col",
-            "overflow-y-auto overflow-x-hidden [scrollbar-gutter:stable]",
+            isChatRoute
+              ? "overflow-hidden"
+              : "overflow-y-auto overflow-x-hidden [scrollbar-gutter:stable]",
          )}
        >
          {children}
--- a/web/src/i18n/en.ts
+++ b/web/src/i18n/en.ts
@ -53,6 +53,7 @@ export const en: Translations = {
    brand: "Hermes Agent",
    brandShort: "HA",
    closeNavigation: "Close navigation",
+    closeModelTools: "Close model and tools",
    footer: {
      org: "Nous Research",
    },
@ -67,6 +68,7 @@ export const en: Translations = {
    },
    nav: {
      analytics: "Analytics",
+      chat: "Chat",
      config: "Config",
      cron: "Cron",
      documentation: "Documentation",
@ -75,6 +77,8 @@ export const en: Translations = {
      sessions: "Sessions",
      skills: "Skills",
    },
+    modelToolsSheetSubtitle: "& tools",
+    modelToolsSheetTitle: "Model",
    navigation: "Navigation",
    openDocumentation: "Open documentation in a new tab",
    openNavigation: "Open navigation",
@ -131,6 +135,7 @@ export const en: Translations = {
      "This permanently removes the conversation and all of its messages. This cannot be undone.",
    sessionDeleted: "Session deleted",
    failedToDelete: "Failed to delete session",
+    resumeInChat: "Resume in Chat",
    previousPage: "Previous page",
    nextPage: "Next page",
    roles: {
--- a/web/src/i18n/types.ts
+++ b/web/src/i18n/types.ts
@ -53,6 +53,7 @@ export interface Translations {
    brand: string;
    brandShort: string;
    closeNavigation: string;
+    closeModelTools: string;
    footer: {
      org: string;
    };
@ -67,6 +68,7 @@ export interface Translations {
    };
    nav: {
      analytics: string;
+      chat: string;
      config: string;
      cron: string;
      documentation: string;
@ -75,6 +77,8 @@ export interface Translations {
      sessions: string;
      skills: string;
    };
+    modelToolsSheetSubtitle: string;
+    modelToolsSheetTitle: string;
    navigation: string;
    openDocumentation: string;
    openNavigation: string;
@ -132,6 +136,7 @@ export interface Translations {
    confirmDeleteMessage: string;
    sessionDeleted: string;
    failedToDelete: string;
+    resumeInChat: string;
    previousPage: string;
    nextPage: string;
    roles: {
--- a/web/src/i18n/zh.ts
+++ b/web/src/i18n/zh.ts
@ -52,6 +52,7 @@ export const zh: Translations = {
    brand: "Hermes Agent",
    brandShort: "HA",
    closeNavigation: "关闭导航",
+    closeModelTools: "关闭模型与工具",
    footer: {
      org: "Nous Research",
    },
@ -66,6 +67,7 @@ export const zh: Translations = {
    },
    nav: {
      analytics: "分析",
+      chat: "对话",
      config: "配置",
      cron: "定时任务",
      documentation: "文档",
@ -74,6 +76,8 @@ export const zh: Translations = {
      sessions: "会话",
      skills: "技能",
    },
+    modelToolsSheetSubtitle: "与工具",
+    modelToolsSheetTitle: "模型",
    navigation: "导航",
    openDocumentation: "在新标签页中打开文档",
    openNavigation: "打开导航",
@ -129,6 +133,7 @@ export const zh: Translations = {
    confirmDeleteMessage: "此操作将永久删除对话及其所有消息，无法恢复。",
    sessionDeleted: "会话已删除",
    failedToDelete: "删除会话失败",
+    resumeInChat: "在对话中继续",
    previousPage: "上一页",
    nextPage: "下一页",
    roles: {
--- a/web/src/index.css
+++ b/web/src/index.css
@ -5,6 +5,36 @@
   Tailwind's JIT purge. */
@source '../node_modules/@nous-research/ui/dist';

+/* ------------------------------------------------------------------ */
+/* JetBrains Mono — bundled for the embedded TUI (/chat tab).          */
+/* Gives the terminal a proper monospace font even on systems where    */
+/* the user doesn't have one installed locally; xterm.js picks it up   */
+/* via ChatPage's `fontFamily` option.                                 */
+/* Apache-2.0.                                                         */
+/* ------------------------------------------------------------------ */
+
+@font-face {
+  font-family: 'JetBrains Mono';
+  font-style: normal;
+  font-weight: 400;
+  font-display: swap;
+  src: url('/fonts-terminal/JetBrainsMono-Regular.woff2') format('woff2');
+}
+@font-face {
+  font-family: 'JetBrains Mono';
+  font-style: normal;
+  font-weight: 700;
+  font-display: swap;
+  src: url('/fonts-terminal/JetBrainsMono-Bold.woff2') format('woff2');
+}
+@font-face {
+  font-family: 'JetBrains Mono';
+  font-style: italic;
+  font-weight: 400;
+  font-display: swap;
+  src: url('/fonts-terminal/JetBrainsMono-Italic.woff2') format('woff2');
+}
+
 /* ------------------------------------------------------------------ */
 /* Hermes Agent — Nous DS with the LENS_0 (Hermes teal) lens applied   */
 /* statically. Mirrors nousnet-web/(hermes-agent)/layout.tsx so the    */
--- a/web/src/lib/dashboard-flags.ts
+++ b/web/src/lib/dashboard-flags.ts
@ -0,0 +1,15 @@
+declare global {
+  interface Window {
+    /** Set true by the server only for `hermes dashboard --tui` (or HERMES_DASHBOARD_TUI=1). */
+    __HERMES_DASHBOARD_EMBEDDED_CHAT__?: boolean;
+    /** @deprecated Older injected name; treated as on when true. */
+    __HERMES_DASHBOARD_TUI__?: boolean;
+  }
+}
+
+/** True only when the dashboard was started with embedded TUI Chat (`hermes dashboard --tui`). */
+export function isDashboardEmbeddedChatEnabled(): boolean {
+  if (typeof window === "undefined") return false;
+  if (window.__HERMES_DASHBOARD_EMBEDDED_CHAT__ === true) return true;
+  return window.__HERMES_DASHBOARD_TUI__ === true;
+}
--- a/web/src/lib/gatewayClient.ts
+++ b/web/src/lib/gatewayClient.ts
@ -0,0 +1,236 @@
+/**
+ * Browser WebSocket client for the tui_gateway JSON-RPC protocol.
+ *
+ * Speaks the exact same newline-delimited JSON-RPC dialect that the Ink TUI
+ * drives over stdio. The server-side transport abstraction
+ * (tui_gateway/transport.py + ws.py) routes the same dispatcher's writes
+ * onto either stdout or a WebSocket depending on how the client connected.
+ *
+ *   const gw = new GatewayClient()
+ *   await gw.connect()
+ *   const { session_id } = await gw.request<{ session_id: string }>("session.create")
+ *   gw.on("message.delta", (ev) => console.log(ev.payload?.text))
+ *   await gw.request("prompt.submit", { session_id, text: "hi" })
+ */
+
+export type GatewayEventName =
+  | "gateway.ready"
+  | "session.info"
+  | "message.start"
+  | "message.delta"
+  | "message.complete"
+  | "thinking.delta"
+  | "reasoning.delta"
+  | "reasoning.available"
+  | "status.update"
+  | "tool.start"
+  | "tool.progress"
+  | "tool.complete"
+  | "tool.generating"
+  | "clarify.request"
+  | "approval.request"
+  | "sudo.request"
+  | "secret.request"
+  | "background.complete"
+  | "btw.complete"
+  | "error"
+  | "skin.changed"
+  | (string & {});
+
+export interface GatewayEvent<P = unknown> {
+  type: GatewayEventName;
+  session_id?: string;
+  payload?: P;
+}
+
+export type ConnectionState =
+  | "idle"
+  | "connecting"
+  | "open"
+  | "closed"
+  | "error";
+
+interface Pending {
+  resolve: (v: unknown) => void;
+  reject: (e: Error) => void;
+  timer: ReturnType<typeof setTimeout>;
+}
+
+const DEFAULT_REQUEST_TIMEOUT_MS = 120_000;
+
+/** Wildcard listener key: subscribe to every event regardless of type. */
+const ANY = "*";
+
+export class GatewayClient {
+  private ws: WebSocket | null = null;
+  private reqId = 0;
+  private pending = new Map<string, Pending>();
+  private listeners = new Map<string, Set<(ev: GatewayEvent) => void>>();
+  private _state: ConnectionState = "idle";
+  private stateListeners = new Set<(s: ConnectionState) => void>();
+
+  get state(): ConnectionState {
+    return this._state;
+  }
+
+  private setState(s: ConnectionState) {
+    if (this._state === s) return;
+    this._state = s;
+    for (const cb of this.stateListeners) cb(s);
+  }
+
+  onState(cb: (s: ConnectionState) => void): () => void {
+    this.stateListeners.add(cb);
+    cb(this._state);
+    return () => this.stateListeners.delete(cb);
+  }
+
+  /** Subscribe to a specific event type. Returns an unsubscribe function. */
+  on<P = unknown>(
+    type: GatewayEventName,
+    cb: (ev: GatewayEvent<P>) => void,
+  ): () => void {
+    let set = this.listeners.get(type);
+    if (!set) {
+      set = new Set();
+      this.listeners.set(type, set);
+    }
+    set.add(cb as (ev: GatewayEvent) => void);
+    return () => set!.delete(cb as (ev: GatewayEvent) => void);
+  }
+
+  /** Subscribe to every event (fires after type-specific listeners). */
+  onAny(cb: (ev: GatewayEvent) => void): () => void {
+    return this.on(ANY as GatewayEventName, cb);
+  }
+
+  async connect(token?: string): Promise<void> {
+    if (this._state === "open" || this._state === "connecting") return;
+    this.setState("connecting");
+
+    const resolved = token ?? window.__HERMES_SESSION_TOKEN__ ?? "";
+    if (!resolved) {
+      this.setState("error");
+      throw new Error(
+        "Session token not available — page must be served by the Hermes dashboard",
+      );
+    }
+
+    const scheme = location.protocol === "https:" ? "wss:" : "ws:";
+    const ws = new WebSocket(
+      `${scheme}//${location.host}/api/ws?token=${encodeURIComponent(resolved)}`,
+    );
+    this.ws = ws;
+
+    // Register message + close BEFORE awaiting open — the server emits
+    // `gateway.ready` immediately after accept, so a listener attached
+    // after the open promise resolves can race past it and drop the
+    // initial skin payload.
+    ws.addEventListener("message", (ev) => {
+      try {
+        this.dispatch(JSON.parse(ev.data));
+      } catch {
+        /* malformed frame — ignore */
+      }
+    });
+
+    ws.addEventListener("close", () => {
+      this.setState("closed");
+      this.rejectAllPending(new Error("WebSocket closed"));
+    });
+
+    await new Promise<void>((resolve, reject) => {
+      const onOpen = () => {
+        ws.removeEventListener("error", onError);
+        this.setState("open");
+        resolve();
+      };
+      const onError = () => {
+        ws.removeEventListener("open", onOpen);
+        this.setState("error");
+        reject(new Error("WebSocket connection failed"));
+      };
+      ws.addEventListener("open", onOpen, { once: true });
+      ws.addEventListener("error", onError, { once: true });
+    });
+  }
+
+  close() {
+    this.ws?.close();
+    this.ws = null;
+  }
+
+  private dispatch(msg: Record<string, unknown>) {
+    const id = msg.id as string | undefined;
+
+    if (id !== undefined && this.pending.has(id)) {
+      const p = this.pending.get(id)!;
+      this.pending.delete(id);
+      clearTimeout(p.timer);
+
+      const err = msg.error as { message?: string } | undefined;
+      if (err) p.reject(new Error(err.message ?? "request failed"));
+      else p.resolve(msg.result);
+      return;
+    }
+
+    if (msg.method !== "event") return;
+
+    const params = (msg.params ?? {}) as GatewayEvent;
+    if (typeof params.type !== "string") return;
+
+    for (const cb of this.listeners.get(params.type) ?? []) cb(params);
+    for (const cb of this.listeners.get(ANY) ?? []) cb(params);
+  }
+
+  private rejectAllPending(err: Error) {
+    for (const p of this.pending.values()) {
+      clearTimeout(p.timer);
+      p.reject(err);
+    }
+    this.pending.clear();
+  }
+
+  /** Send a JSON-RPC request. Rejects on error response or timeout. */
+  request<T = unknown>(
+    method: string,
+    params: Record<string, unknown> = {},
+    timeoutMs = DEFAULT_REQUEST_TIMEOUT_MS,
+  ): Promise<T> {
+    if (!this.ws || this._state !== "open") {
+      return Promise.reject(
+        new Error(`gateway not connected (state=${this._state})`),
+      );
+    }
+
+    const id = `w${++this.reqId}`;
+
+    return new Promise<T>((resolve, reject) => {
+      const timer = setTimeout(() => {
+        if (this.pending.delete(id)) {
+          reject(new Error(`request timed out: ${method}`));
+        }
+      }, timeoutMs);
+
+      this.pending.set(id, {
+        resolve: (v) => resolve(v as T),
+        reject,
+        timer,
+      });
+
+      try {
+        this.ws!.send(JSON.stringify({ jsonrpc: "2.0", id, method, params }));
+      } catch (e) {
+        clearTimeout(timer);
+        this.pending.delete(id);
+        reject(e instanceof Error ? e : new Error(String(e)));
+      }
+    });
+  }
+}
+
+declare global {
+  interface Window {
+    __HERMES_SESSION_TOKEN__?: string;
+  }
+}
--- a/web/src/lib/resolve-page-title.ts
+++ b/web/src/lib/resolve-page-title.ts
@ -1,6 +1,7 @@
 import type { Translations } from "@/i18n/types";

 const BUILTIN: Record<string, keyof Translations["app"]["nav"]> = {
+  "/chat": "chat",
  "/sessions": "sessions",
  "/analytics": "analytics",
  "/logs": "logs",
--- a/web/src/lib/slashExec.ts
+++ b/web/src/lib/slashExec.ts
@ -0,0 +1,163 @@
+/**
+ * Slash command execution pipeline for the web chat.
+ *
+ * Mirrors the Ink TUI's createSlashHandler.ts:
+ *
+ *   1. Parse the command into `name` + `arg`.
+ *   2. Try `slash.exec` — covers every registry-backed command the terminal
+ *      UI knows about (/help, /resume, /compact, /model, …). Output is
+ *      rendered into the transcript.
+ *   3. If `slash.exec` errors (command rejected, unknown, or needs client
+ *      behaviour), fall back to `command.dispatch` which returns a typed
+ *      directive: `exec` | `plugin` | `alias` | `skill` | `send`.
+ *   4. Each directive is dispatched to the appropriate callback.
+ *
+ * Keeping the pipeline here (instead of inline in ChatPage) lets future
+ * clients (SwiftUI, Android) implement the same logic by reading the same
+ * contract.
+ */
+
+import type { GatewayClient } from "@/lib/gatewayClient";
+
+export interface SlashExecResponse {
+  output?: string;
+  warning?: string;
+}
+
+export type CommandDispatchResponse =
+  | { type: "exec" | "plugin"; output?: string }
+  | { type: "alias"; target: string }
+  | { type: "skill"; name: string; message?: string }
+  | { type: "send"; message: string };
+
+export interface SlashExecCallbacks {
+  /** Render a transcript system message. */
+  sys(text: string): void;
+  /** Submit a user message to the agent (prompt.submit). */
+  send(message: string): Promise<void> | void;
+}
+
+export interface SlashExecOptions {
+  /** Raw command including the leading slash (e.g. "/model opus-4.6"). */
+  command: string;
+  /** Session id. If empty the call is still issued — some commands are session-less. */
+  sessionId: string;
+  gw: GatewayClient;
+  callbacks: SlashExecCallbacks;
+}
+
+export type SlashExecResult = "done" | "sent" | "error";
+
+/**
+ * Run a slash command. Returns the terminal state so callers can decide
+ * whether to clear the composer, queue retries, etc.
+ */
+export async function executeSlash({
+  command,
+  sessionId,
+  gw,
+  callbacks: { sys, send },
+}: SlashExecOptions): Promise<SlashExecResult> {
+  const { name, arg } = parseSlash(command);
+
+  if (!name) {
+    sys("empty slash command");
+    return "error";
+  }
+
+  // Primary dispatcher.
+  try {
+    const r = await gw.request<SlashExecResponse>("slash.exec", {
+      command: command.replace(/^\/+/, ""),
+      session_id: sessionId,
+    });
+    const body = r?.output || `/${name}: no output`;
+    sys(r?.warning ? `warning: ${r.warning}\n${body}` : body);
+    return "done";
+  } catch {
+    /* fall through to command.dispatch */
+  }
+
+  try {
+    const d = parseCommandDispatch(
+      await gw.request<unknown>("command.dispatch", {
+        name,
+        arg,
+        session_id: sessionId,
+      }),
+    );
+
+    if (!d) {
+      sys("error: invalid response: command.dispatch");
+      return "error";
+    }
+
+    switch (d.type) {
+      case "exec":
+      case "plugin":
+        sys(d.output ?? "(no output)");
+        return "done";
+
+      case "alias":
+        return executeSlash({
+          command: `/${d.target}${arg ? ` ${arg}` : ""}`,
+          sessionId,
+          gw,
+          callbacks: { sys, send },
+        });
+
+      case "skill":
+      case "send": {
+        const msg = d.message?.trim() ?? "";
+        if (!msg) {
+          sys(
+            `/${name}: ${d.type === "skill" ? "skill payload missing message" : "empty message"}`,
+          );
+          return "error";
+        }
+        if (d.type === "skill") sys(`⚡ loading skill: ${d.name}`);
+        await send(msg);
+        return "sent";
+      }
+    }
+  } catch (err) {
+    sys(`error: ${err instanceof Error ? err.message : String(err)}`);
+    return "error";
+  }
+}
+
+export function parseSlash(command: string): { name: string; arg: string } {
+  const m = command.replace(/^\/+/, "").match(/^(\S+)\s*(.*)$/);
+  return m ? { name: m[1], arg: m[2].trim() } : { name: "", arg: "" };
+}
+
+function parseCommandDispatch(raw: unknown): CommandDispatchResponse | null {
+  if (!raw || typeof raw !== "object") return null;
+
+  const r = raw as Record<string, unknown>;
+  const str = (v: unknown) => (typeof v === "string" ? v : undefined);
+
+  switch (r.type) {
+    case "exec":
+    case "plugin":
+      return { type: r.type, output: str(r.output) };
+
+    case "alias":
+      return typeof r.target === "string"
+        ? { type: "alias", target: r.target }
+        : null;
+
+    case "skill":
+      return typeof r.name === "string"
+        ? { type: "skill", name: r.name, message: str(r.message) }
+        : null;
+
+    case "send":
+      return typeof r.message === "string"
+        ? { type: "send", message: r.message }
+        : null;
+
+    default:
+      return null;
+  }
+}
--- a/web/src/pages/ChatPage.tsx
+++ b/web/src/pages/ChatPage.tsx
@ -0,0 +1,743 @@
+/**
+ * ChatPage — embeds `hermes --tui` inside the dashboard.
+ *
+ *   <div host> (dashboard chrome)                                         .
+ *     └─ <div wrapper> (rounded, dark bg, padded — the "terminal window"  .
+ *         look that gives the page a distinct visual identity)            .
+ *         └─ @xterm/xterm Terminal (WebGL renderer, Unicode 11 widths)    .
+ *              │ onData      keystrokes → WebSocket → PTY master          .
+ *              │ onResize    terminal resize → `\x1b[RESIZE:cols;rows]`   .
+ *              │ write(data) PTY output bytes → VT100 parser              .
+ *              ▼                                                          .
+ *     WebSocket /api/pty?token=<session>                                  .
+ *          ▼                                                              .
+ *     FastAPI pty_ws  (hermes_cli/web_server.py)                          .
+ *          ▼                                                              .
+ *     POSIX PTY → `node ui-tui/dist/entry.js` → tui_gateway + AIAgent     .
+ */
+
+import { FitAddon } from "@xterm/addon-fit";
+import { Unicode11Addon } from "@xterm/addon-unicode11";
+import { WebLinksAddon } from "@xterm/addon-web-links";
+import { WebglAddon } from "@xterm/addon-webgl";
+import { Terminal } from "@xterm/xterm";
+import "@xterm/xterm/css/xterm.css";
+import { Typography } from "@nous-research/ui";
+import { cn } from "@/lib/utils";
+import { Copy, PanelRight, X } from "lucide-react";
+import { useCallback, useEffect, useMemo, useRef, useState } from "react";
+import { createPortal } from "react-dom";
+import { useSearchParams } from "react-router-dom";
+
+import { ChatSidebar } from "@/components/ChatSidebar";
+import { usePageHeader } from "@/contexts/usePageHeader";
+import { useI18n } from "@/i18n";
+
+function buildWsUrl(
+  token: string,
+  resume: string | null,
+  channel: string,
+): string {
+  const proto = window.location.protocol === "https:" ? "wss:" : "ws:";
+  const qs = new URLSearchParams({ token, channel });
+  if (resume) qs.set("resume", resume);
+  return `${proto}//${window.location.host}/api/pty?${qs.toString()}`;
+}
+
+// Channel id ties this chat tab's PTY child (publisher) to its sidebar
+// (subscriber).  Generated once per mount so a tab refresh starts a fresh
+// channel — the previous PTY child terminates with the old WS, and its
+// channel auto-evicts when no subscribers remain.
+function generateChannelId(): string {
+  if (typeof crypto !== "undefined" && "randomUUID" in crypto) {
+    return crypto.randomUUID();
+  }
+  return `chat-${Math.random().toString(36).slice(2)}-${Date.now().toString(36)}`;
+}
+
+// Colors for the terminal body.  Matches the dashboard's dark teal canvas
+// with cream foreground — we intentionally don't pick monokai or a loud
+// theme, because the TUI's skin engine already paints the content; the
+// terminal chrome just needs to sit quietly inside the dashboard.
+const TERMINAL_THEME = {
+  background: "#0d2626",
+  foreground: "#f0e6d2",
+  cursor: "#f0e6d2",
+  cursorAccent: "#0d2626",
+  selectionBackground: "#f0e6d244",
+};
+
+/**
+ * CSS width for xterm font tiers.
+ *
+ * Prefer the terminal host's `clientWidth` — Chrome DevTools device mode often
+ * keeps `window.innerWidth` at the full desktop value while the *drawn* layout
+ * is phone-sized, which made us pick desktop font sizes (~14px) and look huge.
+ */
+function terminalTierWidthPx(host: HTMLElement | null): number {
+  if (typeof window === "undefined") return 1280;
+  const fromHost = host?.clientWidth ?? 0;
+  if (fromHost > 2) return Math.round(fromHost);
+  const doc = document.documentElement?.clientWidth ?? 0;
+  const vv = window.visualViewport;
+  const inner = window.innerWidth;
+  const vvw = vv?.width ?? inner;
+  const layout = Math.min(inner, vvw, doc > 0 ? doc : inner);
+  return Math.max(1, Math.round(layout));
+}
+
+function terminalFontSizeForWidth(layoutWidthPx: number): number {
+  if (layoutWidthPx < 300) return 7;
+  if (layoutWidthPx < 360) return 8;
+  if (layoutWidthPx < 420) return 9;
+  if (layoutWidthPx < 520) return 10;
+  if (layoutWidthPx < 720) return 11;
+  if (layoutWidthPx < 1024) return 12;
+  return 14;
+}
+
+function terminalLineHeightForWidth(layoutWidthPx: number): number {
+  return layoutWidthPx < 1024 ? 1.02 : 1.15;
+}
+
+export default function ChatPage() {
+  const hostRef = useRef<HTMLDivElement | null>(null);
+  const termRef = useRef<Terminal | null>(null);
+  const fitRef = useRef<FitAddon | null>(null);
+  const wsRef = useRef<WebSocket | null>(null);
+  const [searchParams] = useSearchParams();
+  // Lazy-init: the missing-token check happens at construction so the effect
+  // body doesn't have to setState (React 19's set-state-in-effect rule).
+  const [banner, setBanner] = useState<string | null>(() =>
+    typeof window !== "undefined" && !window.__HERMES_SESSION_TOKEN__
+      ? "Session token unavailable. Open this page through `hermes dashboard`, not directly."
+      : null,
+  );
+  const [copyState, setCopyState] = useState<"idle" | "copied">("idle");
+  const copyResetRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+  const [mobilePanelOpen, setMobilePanelOpen] = useState(false);
+  const { setEnd } = usePageHeader();
+  const { t } = useI18n();
+  const closeMobilePanel = useCallback(() => setMobilePanelOpen(false), []);
+  const modelToolsLabel = useMemo(
+    () => `${t.app.modelToolsSheetTitle} ${t.app.modelToolsSheetSubtitle}`,
+    [t.app.modelToolsSheetSubtitle, t.app.modelToolsSheetTitle],
+  );
+  const [portalRoot] = useState<HTMLElement | null>(() =>
+    typeof document !== "undefined" ? document.body : null,
+  );
+  const [narrow, setNarrow] = useState(() =>
+    typeof window !== "undefined"
+      ? window.matchMedia("(max-width: 1023px)").matches
+      : false,
+  );
+
+  const resumeRef = useRef<string | null>(searchParams.get("resume"));
+  const channel = useMemo(() => generateChannelId(), []);
+
+  useEffect(() => {
+    const mql = window.matchMedia("(max-width: 1023px)");
+    const sync = () => setNarrow(mql.matches);
+    sync();
+    mql.addEventListener("change", sync);
+    return () => mql.removeEventListener("change", sync);
+  }, []);
+
+  useEffect(() => {
+    if (!mobilePanelOpen) return;
+    const onKey = (e: KeyboardEvent) => {
+      if (e.key === "Escape") closeMobilePanel();
+    };
+    document.addEventListener("keydown", onKey);
+    const prevOverflow = document.body.style.overflow;
+    document.body.style.overflow = "hidden";
+    return () => {
+      document.removeEventListener("keydown", onKey);
+      document.body.style.overflow = prevOverflow;
+    };
+  }, [mobilePanelOpen, closeMobilePanel]);
+
+  useEffect(() => {
+    const mql = window.matchMedia("(min-width: 1024px)");
+    const onChange = (e: MediaQueryListEvent) => {
+      if (e.matches) setMobilePanelOpen(false);
+    };
+    mql.addEventListener("change", onChange);
+    return () => mql.removeEventListener("change", onChange);
+  }, []);
+
+  useEffect(() => {
+    if (!narrow) {
+      setEnd(null);
+      return;
+    }
+    setEnd(
+      <button
+        type="button"
+        onClick={() => setMobilePanelOpen(true)}
+        className={cn(
+          "inline-flex items-center gap-1.5 rounded border border-current/20",
+          "px-2 py-1 text-[0.65rem] font-medium tracking-wide normal-case",
+          "text-midground/80 hover:text-midground hover:bg-midground/5",
+          "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-midground",
+          "shrink-0 cursor-pointer",
+        )}
+        aria-expanded={mobilePanelOpen}
+        aria-controls="chat-side-panel"
+      >
+        <PanelRight className="h-3 w-3 shrink-0" />
+        {modelToolsLabel}
+      </button>,
+    );
+    return () => setEnd(null);
+  }, [narrow, mobilePanelOpen, modelToolsLabel, setEnd]);
+
+  const handleCopyLast = () => {
+    const ws = wsRef.current;
+    if (!ws || ws.readyState !== WebSocket.OPEN) return;
+    // Send the slash as a burst, wait long enough for Ink's tokenizer to
+    // emit a keypress event for each character (not coalesce them into a
+    // paste), then send Return as its own event.  The timing here is
+    // empirical — 100ms is safely past Node's default stdin coalescing
+    // window and well inside UI responsiveness.
+    ws.send("/copy");
+    setTimeout(() => {
+      const s = wsRef.current;
+      if (s && s.readyState === WebSocket.OPEN) s.send("\r");
+    }, 100);
+    setCopyState("copied");
+    if (copyResetRef.current) clearTimeout(copyResetRef.current);
+    copyResetRef.current = setTimeout(() => setCopyState("idle"), 1500);
+    termRef.current?.focus();
+  };
+
+  useEffect(() => {
+    const host = hostRef.current;
+    if (!host) return;
+
+    const token = window.__HERMES_SESSION_TOKEN__;
+    // Banner already initialised above; just bail before wiring xterm/WS.
+    if (!token) {
+      return;
+    }
+
+    const tierW0 = terminalTierWidthPx(host);
+    const term = new Terminal({
+      allowProposedApi: true,
+      cursorBlink: true,
+      fontFamily:
+        "'JetBrains Mono', 'Cascadia Mono', 'Fira Code', 'MesloLGS NF', 'Source Code Pro', Menlo, Consolas, 'DejaVu Sans Mono', monospace",
+      fontSize: terminalFontSizeForWidth(tierW0),
+      lineHeight: terminalLineHeightForWidth(tierW0),
+      letterSpacing: 0,
+      fontWeight: "400",
+      fontWeightBold: "700",
+      macOptionIsMeta: true,
+      scrollback: 0,
+      theme: TERMINAL_THEME,
+    });
+    termRef.current = term;
+
+    // --- Clipboard integration ---------------------------------------
+    //
+    // Three independent paths all route to the system clipboard:
+    //
+    //   1. **Selection → Ctrl+C (or Cmd+C on macOS).**  Ink's own handler
+    //      in useInputHandlers.ts turns Ctrl+C into a copy when the
+    //      terminal has a selection, then emits an OSC 52 escape.  Our
+    //      OSC 52 handler below decodes that escape and writes to the
+    //      browser clipboard — so the flow works just like it does in
+    //      `hermes --tui`.
+    //
+    //   2. **Ctrl/Cmd+Shift+C.**  Belt-and-suspenders shortcut that
+    //      operates directly on xterm's selection, useful if the TUI
+    //      ever stops listening (e.g. overlays / pickers) or if the user
+    //      has selected with the mouse outside of Ink's selection model.
+    //
+    //   3. **Ctrl/Cmd+Shift+V.**  Reads the system clipboard and feeds
+    //      it to the terminal as keyboard input.  xterm's paste() wraps
+    //      it with bracketed-paste if the host has that mode enabled.
+    //
+    // OSC 52 reads (terminal asking to read the clipboard) are not
+    // supported — that would let any content the TUI renders exfiltrate
+    // the user's clipboard.
+    term.parser.registerOscHandler(52, (data) => {
+      // Format: "<targets>;<base64 | '?'>"
+      const semi = data.indexOf(";");
+      if (semi < 0) return false;
+      const payload = data.slice(semi + 1);
+      if (payload === "?" || payload === "") return false; // read/clear — ignore
+      try {
+        // atob returns a binary string (one byte per char); we need UTF-8
+        // decode so multi-byte codepoints (≥, →, emoji, CJK) round-trip
+        // correctly.  Without this step, the three UTF-8 bytes of `≥`
+        // would land in the clipboard as the three separate Latin-1
+        // characters `â‰¥`.
+        const binary = atob(payload);
+        const bytes = Uint8Array.from(binary, (c) => c.charCodeAt(0));
+        const text = new TextDecoder("utf-8").decode(bytes);
+        navigator.clipboard.writeText(text).catch(() => {});
+      } catch {
+        // Malformed base64 — silently drop.
+      }
+      return true;
+    });
+
+    const isMac =
+      typeof navigator !== "undefined" && /Mac/i.test(navigator.platform);
+
+    term.attachCustomKeyEventHandler((ev) => {
+      if (ev.type !== "keydown") return true;
+
+      const copyModifier = isMac ? ev.metaKey : ev.ctrlKey && ev.shiftKey;
+      const pasteModifier = isMac ? ev.metaKey : ev.ctrlKey && ev.shiftKey;
+
+      if (copyModifier && ev.key.toLowerCase() === "c") {
+        const sel = term.getSelection();
+        if (sel) {
+          navigator.clipboard.writeText(sel).catch(() => {});
+          ev.preventDefault();
+          return false;
+        }
+      }
+
+      if (pasteModifier && ev.key.toLowerCase() === "v") {
+        navigator.clipboard
+          .readText()
+          .then((text) => {
+            if (text) term.paste(text);
+          })
+          .catch(() => {});
+        ev.preventDefault();
+        return false;
+      }
+
+      return true;
+    });
+
+    const fit = new FitAddon();
+    fitRef.current = fit;
+    term.loadAddon(fit);
+
+    const unicode11 = new Unicode11Addon();
+    term.loadAddon(unicode11);
+    term.unicode.activeVersion = "11";
+
+    term.loadAddon(new WebLinksAddon());
+
+    term.open(host);
+
+    // WebGL draws from a texture atlas sized with device pixels. On phones and
+    // in DevTools device mode that often produces *visually* much larger cells
+    // than `fontSize` suggests — users see "huge" text even at 7–9px settings.
+    // The canvas/DOM renderer tracks `fontSize` faithfully; use it for narrow
+    // hosts.  Wide layouts still get WebGL for crisp box-drawing.
+    const useWebgl = terminalTierWidthPx(host) >= 768;
+    if (useWebgl) {
+      try {
+        const webgl = new WebglAddon();
+        webgl.onContextLoss(() => webgl.dispose());
+        term.loadAddon(webgl);
+      } catch (err) {
+        console.warn(
+          "[hermes-chat] WebGL renderer unavailable; falling back to default",
+          err,
+        );
+      }
+    }
+
+    // Initial fit + resize observer.  fit.fit() reads the container's
+    // current bounding box and resizes the terminal grid to match.
+    //
+    // The subtle bit: the dashboard has CSS transitions on the container
+    // (backdrop fade-in, rounded corners settling as fonts load).  If we
+    // call fit() at mount time, the bounding box we measure is often 1-2
+    // cell widths off from the final size.  ResizeObserver *does* fire
+    // when the container settles, but if the pixel delta happens to be
+    // smaller than one cell's width, fit() computes the same integer
+    // (cols, rows) as before and doesn't emit onResize — so the PTY
+    // never learns the final size.  Users see truncated long lines until
+    // they resize the browser window.
+    //
+    // We force one extra fit + explicit RESIZE send after two animation
+    // frames.  rAF→rAF guarantees one layout commit between the two
+    // callbacks, giving CSS transitions and font metrics time to finalize
+    // before we take the authoritative measurement.
+    let hostSyncRaf = 0;
+    const scheduleHostSync = () => {
+      if (hostSyncRaf) return;
+      hostSyncRaf = requestAnimationFrame(() => {
+        hostSyncRaf = 0;
+        syncTerminalMetrics();
+      });
+    };
+
+    let metricsDebounce: ReturnType<typeof setTimeout> | null = null;
+    const syncTerminalMetrics = () => {
+      const w = terminalTierWidthPx(host);
+      const nextSize = terminalFontSizeForWidth(w);
+      const nextLh = terminalLineHeightForWidth(w);
+      const fontChanged =
+        term.options.fontSize !== nextSize ||
+        term.options.lineHeight !== nextLh;
+      if (fontChanged) {
+        term.options.fontSize = nextSize;
+        term.options.lineHeight = nextLh;
+      }
+      try {
+        fit.fit();
+      } catch {
+        return;
+      }
+      if (fontChanged && term.rows > 0) {
+        try {
+          term.refresh(0, term.rows - 1);
+        } catch {
+          /* ignore */
+        }
+      }
+      if (
+        fontChanged &&
+        wsRef.current &&
+        wsRef.current.readyState === WebSocket.OPEN
+      ) {
+        wsRef.current.send(`\x1b[RESIZE:${term.cols};${term.rows}]`);
+      }
+    };
+
+    const scheduleSyncTerminalMetrics = () => {
+      if (metricsDebounce) clearTimeout(metricsDebounce);
+      metricsDebounce = setTimeout(() => {
+        metricsDebounce = null;
+        syncTerminalMetrics();
+      }, 60);
+    };
+
+    const ro = new ResizeObserver(() => scheduleHostSync());
+    ro.observe(host);
+
+    window.addEventListener("resize", scheduleSyncTerminalMetrics);
+    window.visualViewport?.addEventListener("resize", scheduleSyncTerminalMetrics);
+    window.visualViewport?.addEventListener("scroll", scheduleSyncTerminalMetrics);
+    scheduleHostSync();
+    requestAnimationFrame(() => scheduleHostSync());
+
+    // Double-rAF authoritative fit.  On the second frame the layout has
+    // committed at least once since mount; fit.fit() then reads the
+    // stable container size.  We always send a RESIZE escape afterwards
+    // (even if fit's cols/rows didn't change, so the PTY has the same
+    // dims registered as our JS state — prevents a drift where Ink
+    // thinks the terminal is one col bigger than what's on screen).
+    let settleRaf1 = 0;
+    let settleRaf2 = 0;
+    settleRaf1 = requestAnimationFrame(() => {
+      settleRaf1 = 0;
+      settleRaf2 = requestAnimationFrame(() => {
+        settleRaf2 = 0;
+        syncTerminalMetrics();
+      });
+    });
+
+    // WebSocket
+    const url = buildWsUrl(token, resumeRef.current, channel);
+    const ws = new WebSocket(url);
+    ws.binaryType = "arraybuffer";
+    wsRef.current = ws;
+    // Suppress banner/terminal side-effects when cleanup() calls `ws.close()`
+    // (React StrictMode remount, route change) so we never write to a
+    // disposed xterm or setState on an unmounted tree.
+    let unmounting = false;
+
+    ws.onopen = () => {
+      setBanner(null);
+      // Send the initial RESIZE immediately so Ink has *a* size to lay
+      // out against on its first paint.  The double-rAF block above will
+      // follow up with the authoritative measurement — at worst Ink
+      // reflows once after the PTY boots, which is imperceptible.
+      ws.send(`\x1b[RESIZE:${term.cols};${term.rows}]`);
+    };
+
+    ws.onmessage = (ev) => {
+      if (typeof ev.data === "string") {
+        term.write(ev.data);
+      } else {
+        term.write(new Uint8Array(ev.data as ArrayBuffer));
+      }
+    };
+
+    ws.onclose = (ev) => {
+      wsRef.current = null;
+      if (unmounting) {
+        return;
+      }
+      if (ev.code === 4401) {
+        setBanner("Auth failed. Reload the page to refresh the session token.");
+        return;
+      }
+      if (ev.code === 4403) {
+        setBanner("Chat is only reachable from localhost.");
+        return;
+      }
+      if (ev.code === 1011) {
+        // Server already wrote an ANSI error frame.
+        return;
+      }
+      term.write("\r\n\x1b[90m[session ended]\x1b[0m\r\n");
+    };
+
+    // Keystrokes + mouse events → PTY, with cell-level dedup for motion.
+    //
+    // Ink enables `\x1b[?1003h` (any-motion tracking), which asks the
+    // terminal to report every mouse-move as an SGR mouse event even with
+    // no button held.  xterm.js happily emits one report per pixel of
+    // mouse motion; without deduping, a casual mouse-over floods Ink with
+    // hundreds of redraw-triggering reports and the UI goes laggy
+    // (scrolling stutters, clicks land on stale positions by the time
+    // Ink finishes processing the motion backlog).
+    //
+    // We keep track of the last cell we reported a motion for.  Press,
+    // release, and wheel events always pass through; motion events only
+    // pass through if the cell changed.  Parsing is cheap — SGR reports
+    // are short literal strings.
+    // eslint-disable-next-line no-control-regex -- intentional ESC byte in xterm SGR mouse report parser
+    const SGR_MOUSE_RE = /^\x1b\[<(\d+);(\d+);(\d+)([Mm])$/;
+    let lastMotionCell = { col: -1, row: -1 };
+    let lastMotionCb = -1;
+    const onDataDisposable = term.onData((data) => {
+      if (ws.readyState !== WebSocket.OPEN) return;
+
+      const m = SGR_MOUSE_RE.exec(data);
+      if (m) {
+        const cb = parseInt(m[1], 10);
+        const col = parseInt(m[2], 10);
+        const row = parseInt(m[3], 10);
+        const released = m[4] === "m";
+        // Motion events have bit 0x20 (32) set in the button code.
+        // Wheel events have bit 0x40 (64); always forward wheel.
+        const isMotion = (cb & 0x20) !== 0 && (cb & 0x40) === 0;
+        const isWheel = (cb & 0x40) !== 0;
+        if (isMotion && !isWheel && !released) {
+          if (
+            col === lastMotionCell.col &&
+            row === lastMotionCell.row &&
+            cb === lastMotionCb
+          ) {
+            return; // same cell + same button state; skip redundant report
+          }
+          lastMotionCell = { col, row };
+          lastMotionCb = cb;
+        } else {
+          // Non-motion event (press, release, wheel) — reset dedup state
+          // so the next motion after this always reports.
+          lastMotionCell = { col: -1, row: -1 };
+          lastMotionCb = -1;
+        }
+      }
+
+      ws.send(data);
+    });
+
+    const onResizeDisposable = term.onResize(({ cols, rows }) => {
+      if (ws.readyState === WebSocket.OPEN) {
+        ws.send(`\x1b[RESIZE:${cols};${rows}]`);
+      }
+    });
+
+    term.focus();
+
+    return () => {
+      unmounting = true;
+      onDataDisposable.dispose();
+      onResizeDisposable.dispose();
+      if (metricsDebounce) clearTimeout(metricsDebounce);
+      window.removeEventListener("resize", scheduleSyncTerminalMetrics);
+      window.visualViewport?.removeEventListener(
+        "resize",
+        scheduleSyncTerminalMetrics,
+      );
+      window.visualViewport?.removeEventListener(
+        "scroll",
+        scheduleSyncTerminalMetrics,
+      );
+      ro.disconnect();
+      if (hostSyncRaf) cancelAnimationFrame(hostSyncRaf);
+      if (settleRaf1) cancelAnimationFrame(settleRaf1);
+      if (settleRaf2) cancelAnimationFrame(settleRaf2);
+      ws.close();
+      wsRef.current = null;
+      term.dispose();
+      termRef.current = null;
+      fitRef.current = null;
+      if (copyResetRef.current) {
+        clearTimeout(copyResetRef.current);
+        copyResetRef.current = null;
+      }
+    };
+  }, [channel]);
+
+  // Layout:
+  //   outer flex column — sits inside the dashboard's content area
+  //   row split — terminal pane (flex-1) + sidebar (fixed width, lg+)
+  //   terminal wrapper — rounded, dark, padded — the "terminal window"
+  //   floating copy button — bottom-right corner, transparent with a
+  //     subtle border; stays out of the way until hovered.  Sends
+  //     `/copy\n` to Ink, which emits OSC 52 → our clipboard handler.
+  //   sidebar — ChatSidebar opens its own JSON-RPC sidecar; renders
+  //     model badge, tool-call list, model picker. Best-effort: if the
+  //     sidecar fails to connect the terminal pane keeps working.
+  //
+  // `normal-case` opts out of the dashboard's global `uppercase` rule on
+  // the root `<div>` in App.tsx — terminal output must preserve case.
+  //
+  // Mobile model/tools sheet is portaled to `document.body` so it stacks
+  // above the app sidebar (`z-50`) and mobile chrome (`z-40`).  The main
+  // dashboard column uses `relative z-2`, which traps `position:fixed`
+  // descendants below those layers (see Toast.tsx).
+  const mobileModelToolsPortal =
+    narrow &&
+    portalRoot &&
+    createPortal(
+      <>
+        {mobilePanelOpen && (
+          <button
+            type="button"
+            aria-label={t.app.closeModelTools}
+            onClick={closeMobilePanel}
+            className={cn(
+              "fixed inset-0 z-[55]",
+              "bg-black/60 backdrop-blur-sm cursor-pointer",
+            )}
+          />
+        )}
+
+        <div
+          id="chat-side-panel"
+          role="complementary"
+          aria-label={modelToolsLabel}
+          className={cn(
+            "font-mondwest fixed top-0 right-0 z-[60] flex h-dvh max-h-dvh w-64 min-w-0 flex-col antialiased",
+            "border-l border-current/20 text-midground",
+            "bg-background-base/95 backdrop-blur-sm",
+            "transition-transform duration-200 ease-out",
+            "[background:var(--component-sidebar-background)]",
+            "[clip-path:var(--component-sidebar-clip-path)]",
+            "[border-image:var(--component-sidebar-border-image)]",
+            mobilePanelOpen
+              ? "translate-x-0"
+              : "pointer-events-none translate-x-full",
+          )}
+        >
+          <div
+            className={cn(
+              "flex h-14 shrink-0 items-center justify-between gap-2 border-b border-current/20 px-5",
+            )}
+          >
+            <Typography
+              className="font-bold text-[1.125rem] leading-[0.95] tracking-[0.0525rem] text-midground"
+              style={{ mixBlendMode: "plus-lighter" }}
+            >
+              {t.app.modelToolsSheetTitle}
+              <br />
+              {t.app.modelToolsSheetSubtitle}
+            </Typography>
+
+            <button
+              type="button"
+              onClick={closeMobilePanel}
+              aria-label={t.app.closeModelTools}
+              className={cn(
+                "inline-flex h-7 w-7 items-center justify-center",
+                "text-midground/70 hover:text-midground transition-colors cursor-pointer",
+                "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-midground",
+              )}
+            >
+              <X className="h-4 w-4" />
+            </button>
+          </div>
+
+          <div
+            className={cn(
+              "min-h-0 flex-1 overflow-y-auto overflow-x-hidden",
+              "border-t border-current/10",
+            )}
+          >
+            <ChatSidebar channel={channel} />
+          </div>
+        </div>
+      </>,
+      portalRoot,
+    );
+
+  return (
+    <div className="flex min-h-0 flex-1 flex-col gap-2 normal-case">
+      {mobileModelToolsPortal}
+
+      {banner && (
+        <div className="border border-warning/50 bg-warning/10 text-warning px-3 py-2 text-xs tracking-wide">
+          {banner}
+        </div>
+      )}
+
+      <div className="flex min-h-0 flex-1 flex-col gap-2 lg:flex-row lg:gap-3">
+        <div
+          className={cn(
+            "relative flex min-h-0 min-w-0 flex-1 flex-col overflow-hidden rounded-lg",
+            "p-2 sm:p-3",
+          )}
+          style={{
+            backgroundColor: TERMINAL_THEME.background,
+            boxShadow: "0 8px 32px rgba(0, 0, 0, 0.4)",
+          }}
+        >
+          <div
+            ref={hostRef}
+            className="hermes-chat-xterm-host min-h-0 min-w-0 flex-1"
+          />
+
+          <button
+            type="button"
+            onClick={handleCopyLast}
+            title="Copy last assistant response as raw markdown"
+            aria-label="Copy last assistant response"
+            className={cn(
+              "absolute z-10 flex items-center gap-1.5",
+              "rounded border border-current/30",
+              "bg-black/20 backdrop-blur-sm",
+              "opacity-60 hover:opacity-100 hover:border-current/60",
+              "transition-opacity duration-150",
+              "focus-visible:opacity-100 focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-current",
+              "cursor-pointer",
+              "bottom-2 right-2 px-2 py-1 text-[0.65rem] sm:bottom-3 sm:right-3 sm:px-2.5 sm:py-1.5 sm:text-xs",
+              "lg:bottom-4 lg:right-4",
+            )}
+            style={{ color: TERMINAL_THEME.foreground }}
+          >
+            <Copy className="h-3 w-3 shrink-0" />
+            <span className="hidden min-[400px]:inline tracking-wide">
+              {copyState === "copied" ? "copied" : "copy last response"}
+            </span>
+          </button>
+        </div>
+
+        {!narrow && (
+          <div
+            id="chat-side-panel"
+            role="complementary"
+            aria-label={modelToolsLabel}
+            className="flex min-h-0 shrink-0 flex-col lg:h-full lg:w-80"
+          >
+            <div className="min-h-0 flex-1 overflow-y-auto overflow-x-hidden">
+              <ChatSidebar channel={channel} />
+            </div>
+          </div>
+        )}
+      </div>
+    </div>
+  );
+}
+
+declare global {
+  interface Window {
+    __HERMES_SESSION_TOKEN__?: string;
+  }
+}
--- a/web/src/pages/DocsPage.tsx
+++ b/web/src/pages/DocsPage.tsx
@ -35,7 +35,7 @@ export default function DocsPage() {
    <div
      className={cn(
        "flex min-h-0 w-full min-w-0 flex-1 flex-col",
-        "-mx-3 sm:-mx-6",
+        "pt-1 sm:pt-2",
      )}
    >
      <iframe
--- a/web/src/pages/SessionsPage.tsx
+++ b/web/src/pages/SessionsPage.tsx
@ -1,4 +1,11 @@
-import { useEffect, useLayoutEffect, useState, useCallback, useRef } from "react";
+import {
+  useEffect,
+  useLayoutEffect,
+  useState,
+  useCallback,
+  useRef,
+} from "react";
+import { useNavigate } from "react-router-dom";
 import {
  AlertTriangle,
  CheckCircle2,
@ -16,6 +23,7 @@ import {
  MessageCircle,
  Hash,
  X,
+  Play,
 } from "lucide-react";
 import { api } from "@/lib/api";
 import type {
@ -38,6 +46,7 @@ import { useSystemActions } from "@/contexts/useSystemActions";
 import { useToast } from "@/hooks/useToast";
 import { useI18n } from "@/i18n";
 import { usePageHeader } from "@/contexts/usePageHeader";
+import { isDashboardEmbeddedChatEnabled } from "@/lib/dashboard-flags";

 const SOURCE_CONFIG: Record<string, { icon: typeof Terminal; color: string }> =
  {
@ -250,6 +259,7 @@ function SessionRow({
  isExpanded,
  onToggle,
  onDelete,
+  resumeInChatEnabled,
 }: {
  session: SessionInfo;
  snippet?: string;
@ -257,11 +267,13 @@ function SessionRow({
  isExpanded: boolean;
  onToggle: () => void;
  onDelete: () => void;
+  resumeInChatEnabled: boolean;
 }) {
  const [messages, setMessages] = useState<SessionMessage[] | null>(null);
  const [loading, setLoading] = useState(false);
  const [error, setError] = useState<string | null>(null);
  const { t } = useI18n();
+  const navigate = useNavigate();

  useEffect(() => {
    if (isExpanded && messages === null && !loading) {
@ -341,6 +353,21 @@ function SessionRow({
          <Badge variant="outline" className="text-[10px]">
            {session.source ?? "local"}
          </Badge>
+          {resumeInChatEnabled && (
+            <Button
+              variant="ghost"
+              size="icon"
+              className="h-7 w-7 text-muted-foreground hover:text-success"
+              aria-label={t.sessions.resumeInChat}
+              title={t.sessions.resumeInChat}
+              onClick={(e) => {
+                e.stopPropagation();
+                navigate(`/chat?resume=${encodeURIComponent(session.id)}`);
+              }}
+            >
+              <Play className="h-3.5 w-3.5" />
+            </Button>
+          )}
          <Button
            variant="ghost"
            size="icon"
@ -400,6 +427,7 @@ export default function SessionsPage() {
  const { t } = useI18n();
  const { setAfterTitle, setEnd } = usePageHeader();
  const { activeAction, actionStatus, dismissLog } = useSystemActions();
+  const resumeInChatEnabled = isDashboardEmbeddedChatEnabled();

  useLayoutEffect(() => {
    if (loading) {
@ -764,6 +792,7 @@ export default function SessionsPage() {
                  setExpandedId((prev) => (prev === s.id ? null : s.id))
                }
                onDelete={() => sessionDelete.requestDelete(s.id)}
+                resumeInChatEnabled={resumeInChatEnabled}
              />
            ))}
          </div>
--- a/web/vite.config.ts
+++ b/web/vite.config.ts
@ -17,6 +17,10 @@ const BACKEND = process.env.HERMES_DASHBOARD_URL ?? "http://127.0.0.1:9119";
 */
 function hermesDevToken(): Plugin {
  const TOKEN_RE = /window\.__HERMES_SESSION_TOKEN__\s*=\s*"([^"]+)"/;
+  const EMBEDDED_RE =
+    /window\.__HERMES_DASHBOARD_EMBEDDED_CHAT__\s*=\s*(true|false)/;
+  const LEGACY_TUI_RE =
+    /window\.__HERMES_DASHBOARD_TUI__\s*=\s*(true|false)/;

  return {
    name: "hermes:dev-session-token",
@ -33,11 +37,20 @@ function hermesDevToken(): Plugin {
          );
          return;
        }
+        const embeddedMatch = html.match(EMBEDDED_RE);
+        const legacyMatch = html.match(LEGACY_TUI_RE);
+        const embeddedJs = embeddedMatch
+          ? embeddedMatch[1]
+          : legacyMatch
+            ? legacyMatch[1]
+            : "false";
        return [
          {
            tag: "script",
            injectTo: "head",
-            children: `window.__HERMES_SESSION_TOKEN__="${match[1]}";`,
+            children:
+              `window.__HERMES_SESSION_TOKEN__="${match[1]}";` +
+              `window.__HERMES_DASHBOARD_EMBEDDED_CHAT__=${embeddedJs};`,
          },
        ];
      } catch (err) {
@ -64,7 +77,10 @@ export default defineConfig({
  },
  server: {
    proxy: {
-      "/api": BACKEND,
+      "/api": {
+        target: BACKEND,
+        ws: true,
+      },
      // Same host as `hermes dashboard` must serve these; Vite has no
      // dashboard-plugins/* files, so without this, plugin scripts 404
      // or receive index.html in dev.
--- a/website/docs/developer-guide/architecture.md
+++ b/website/docs/developer-guide/architecture.md
@ -35,7 +35,7 @@ This page is the top-level map of Hermes Agent internals. Use it to orient yours
 │  │              │  │ codex_resp.  │  │ 47 tools     │               │
 │  │              │  │ anthropic    │  │ 19 toolsets  │               │
 │  └──────────────┘  └──────────────┘  └──────────────┘               │
-└─────────────────────────────────────────────────────────────────────┘
+└─────────┴─────────────────┴─────────────────┴───────────────────────┘
           │                                    │
           ▼                                    ▼
 ┌───────────────────┐              ┌──────────────────────┐
--- a/website/docs/developer-guide/gateway-internals.md
+++ b/website/docs/developer-guide/gateway-internals.md
@ -46,7 +46,7 @@ The messaging gateway is the long-running process that connects Hermes to 14+ ex
 │                     ▼                           │
 │                 SessionStore                    │
 │              (SQLite persistence)               │
-└─────────────────────────────────────────────────┘
+└───────┴─────────────┴─────────────┴─────────────┘
 ```

 ## Message Flow
--- a/website/docs/reference/skills-catalog.md
+++ b/website/docs/reference/skills-catalog.md
@ -101,6 +101,7 @@ If a skill is missing from this list but present in the repo, the catalog is reg
 | [`gif-search`](/docs/user-guide/skills/bundled/media/media-gif-search) | Search and download GIFs from Tenor using curl. No dependencies beyond curl and jq. Useful for finding reaction GIFs, creating visual content, and sending GIFs in chat. | `media/gif-search` |
 | [`heartmula`](/docs/user-guide/skills/bundled/media/media-heartmula) | Set up and run HeartMuLa, the open-source music generation model family (Suno-like). Generates full songs from lyrics + tags with multilingual support. | `media/heartmula` |
 | [`songsee`](/docs/user-guide/skills/bundled/media/media-songsee) | Generate spectrograms and audio feature visualizations (mel, chroma, MFCC, tempogram, etc.) from audio files via CLI. Useful for audio analysis, music production debugging, and visual documentation. | `media/songsee` |
+| [`spotify`](/docs/user-guide/skills/bundled/media/media-spotify) | Control Spotify — play music, search the catalog, manage playlists and library, inspect devices and playback state. Loads when the user asks to play/pause/queue music, search tracks/albums/artists, manage playlists, or check what's playi... | `media/spotify` |
 | [`youtube-content`](/docs/user-guide/skills/bundled/media/media-youtube-content) | Fetch YouTube video transcripts and transform them into structured content (chapters, summaries, threads, blog posts). Use when the user shares a YouTube URL or video link, asks to summarize a video, requests a transcript, or wants to ex... | `media/youtube-content` |

 ## mlops
--- a/website/docs/user-guide/features/web-dashboard.md
+++ b/website/docs/user-guide/features/web-dashboard.md
@ -37,13 +37,13 @@ hermes dashboard --no-open

 ## Prerequisites

-The web dashboard requires FastAPI and Uvicorn. Install them with:
+The default `hermes-agent` install does not ship the HTTP stack or PTY helper — those are optional extras. The **web dashboard** needs FastAPI and Uvicorn (`web` extra). The **Chat** tab also needs `ptyprocess` to spawn the embedded TUI behind a pseudo-terminal (`pty` extra on POSIX). Install both with:

 ```bash
-pip install hermes-agent[web]
+pip install 'hermes-agent[web,pty]'
 ```

-If you installed with `pip install hermes-agent[all]`, the web dependencies are already included.
+The `web` extra pulls in FastAPI/Uvicorn; `pty` pulls in `ptyprocess` (POSIX) or `pywinpty` (native Windows — note that the embedded TUI itself still requires WSL). `pip install hermes-agent[all]` includes both extras and is the easiest path if you also want messaging/voice/etc.

 When you run `hermes dashboard` without the dependencies, it will tell you what to install. If the frontend hasn't been built yet and `npm` is available, it builds automatically on first launch.

@ -60,6 +60,28 @@ The landing page shows a live overview of your installation:

 The status page auto-refreshes every 5 seconds.

+### Chat
+
+The **Chat** tab embeds the full Hermes TUI (the same interface you get from `hermes --tui`) directly in the browser. Everything you can do in the terminal TUI — slash commands, model picker, tool-call cards, markdown streaming, clarify/sudo/approval prompts, skin theming — works identically here, because the dashboard is running the real TUI binary and rendering its ANSI output through [xterm.js](https://xtermjs.org/) with its WebGL renderer for pixel-perfect cell layout.
+
+**How it works:**
+
+- `/api/pty` opens a WebSocket authenticated with the dashboard's session token
+- The server spawns `hermes --tui` behind a POSIX pseudo-terminal
+- Keystrokes travel to the PTY; ANSI output streams back to the browser
+- xterm.js's WebGL renderer paints each cell to an integer-pixel grid; mouse tracking (SGR 1006), wide characters (Unicode 11), and box-drawing glyphs all render natively
+- Resizing the browser window resizes the TUI via the `@xterm/addon-fit` addon
+
+**Resume an existing session:** from the **Sessions** tab, click the play icon (▶) next to any session. That jumps to `/chat?resume=<id>` and launches the TUI with `--resume`, loading the full history.
+
+**Prerequisites:**
+
+- Node.js (same requirement as `hermes --tui`; the TUI bundle is built on first launch)
+- `ptyprocess` — installed by the `pty` extra (`pip install 'hermes-agent[web,pty]'`, or `[all]` covers both)
+- POSIX kernel (Linux, macOS, or WSL). Native Windows Python is not supported — use WSL.
+
+Close the browser tab and the PTY is reaped cleanly on the server. Re-opening spawns a fresh session.
+
 ### Config

 A form-based editor for `config.yaml`. All 150+ configuration fields are auto-discovered from `DEFAULT_CONFIG` and organized into tabbed categories:
--- a/website/docs/user-guide/skills/bundled/media/media-spotify.md
+++ b/website/docs/user-guide/skills/bundled/media/media-spotify.md
@ -0,0 +1,150 @@
+---
+title: "Spotify"
+sidebar_label: "Spotify"
+description: "Control Spotify — play music, search the catalog, manage playlists and library, inspect devices and playback state"
+---
+
+{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */}
+
+# Spotify
+
+Control Spotify — play music, search the catalog, manage playlists and library, inspect devices and playback state. Loads when the user asks to play/pause/queue music, search tracks/albums/artists, manage playlists, or check what's playing. Assumes the Hermes Spotify toolset is enabled and `hermes auth spotify` has been run.
+
+## Skill metadata
+
+| | |
+|---|---|
+| Source | Bundled (installed by default) |
+| Path | `skills/media/spotify` |
+| Version | `1.0.0` |
+| Author | Hermes Agent |
+| License | MIT |
+| Tags | `spotify`, `music`, `playback`, `playlists`, `media` |
+| Related skills | [`gif-search`](/docs/user-guide/skills/bundled/media/media-gif-search) |
+
+## Reference: full SKILL.md
+
+:::info
+The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active.
+:::
+
+# Spotify
+
+Control the user's Spotify account via the Hermes Spotify toolset (7 tools). Setup guide: https://hermes-agent.nousresearch.com/docs/user-guide/features/spotify
+
+## When to use this skill
+
+The user says something like "play X", "pause", "skip", "queue up X", "what's playing", "search for X", "add to my X playlist", "make a playlist", "save this to my library", etc.
+
+## The 7 tools
+
+- `spotify_playback` — play, pause, next, previous, seek, set_repeat, set_shuffle, set_volume, get_state, get_currently_playing, recently_played
+- `spotify_devices` — list, transfer
+- `spotify_queue` — get, add
+- `spotify_search` — search the catalog
+- `spotify_playlists` — list, get, create, add_items, remove_items, update_details
+- `spotify_albums` — get, tracks
+- `spotify_library` — list/save/remove with `kind: "tracks"|"albums"`
+
+Playback-mutating actions require Spotify Premium; search/library/playlist ops work on Free.
+
+## Canonical patterns (minimize tool calls)
+
+### "Play &lt;artist/track/album>"
+One search, then play by URI. Do NOT loop through search results describing them unless the user asked for options.
+
+```
+spotify_search({"query": "miles davis kind of blue", "types": ["album"], "limit": 1})
+→ got album URI spotify:album:1weenld61qoidwYuZ1GESA
+spotify_playback({"action": "play", "context_uri": "spotify:album:1weenld61qoidwYuZ1GESA"})
+```
+
+For "play some &lt;artist>" (no specific song), prefer `types: ["artist"]` and play the artist context URI — Spotify handles smart shuffle. If the user says "the song" or "that track", search `types: ["track"]` and pass `uris: [track_uri]` to play.
+
+### "What's playing?" / "What am I listening to?"
+Single call — don't chain get_state after get_currently_playing.
+
+```
+spotify_playback({"action": "get_currently_playing"})
+```
+
+If it returns 204/empty (`is_playing: false`), tell the user nothing is playing. Don't retry.
+
+### "Pause" / "Skip" / "Volume 50"
+Direct action, no preflight inspection needed.
+
+```
+spotify_playback({"action": "pause"})
+spotify_playback({"action": "next"})
+spotify_playback({"action": "set_volume", "volume_percent": 50})
+```
+
+### "Add to my &lt;playlist name> playlist"
+1. `spotify_playlists list` to find the playlist ID by name
+2. Get the track URI (from currently playing, or search)
+3. `spotify_playlists add_items` with the playlist_id and URIs
+
+```
+spotify_playlists({"action": "list"})
+→ found "Late Night Jazz" = 37i9dQZF1DX4wta20PHgwo
+spotify_playback({"action": "get_currently_playing"})
+→ current track uri = spotify:track:0DiWol3AO6WpXZgp0goxAV
+spotify_playlists({"action": "add_items",
+                   "playlist_id": "37i9dQZF1DX4wta20PHgwo",
+                   "uris": ["spotify:track:0DiWol3AO6WpXZgp0goxAV"]})
+```
+
+### "Create a playlist called X and add the last 3 songs I played"
+```
+spotify_playback({"action": "recently_played", "limit": 3})
+spotify_playlists({"action": "create", "name": "Focus 2026"})
+→ got playlist_id back in response
+spotify_playlists({"action": "add_items", "playlist_id": <id>, "uris": [<3 uris>]})
+```
+
+### "Save / unsave / is this saved?"
+Use `spotify_library` with the right `kind`.
+
+```
+spotify_library({"kind": "tracks", "action": "save", "uris": ["spotify:track:..."]})
+spotify_library({"kind": "albums", "action": "list", "limit": 50})
+```
+
+### "Transfer playback to my &lt;device>"
+```
+spotify_devices({"action": "list"})
+→ pick the device_id by matching name/type
+spotify_devices({"action": "transfer", "device_id": "<id>", "play": true})
+```
+
+## Critical failure modes
+
+**`403 Forbidden — No active device found`** on any playback action means Spotify isn't running anywhere. Tell the user: "Open Spotify on your phone/desktop/web player first, start any track for a second, then retry." Don't retry the tool call blindly — it will fail the same way. You can call `spotify_devices list` to confirm; an empty list means no active device.
+
+**`403 Forbidden — Premium required`** means the user is on Free and tried to mutate playback. Don't retry; tell them this action needs Premium. Reads still work (search, playlists, library, get_state).
+
+**`204 No Content` on `get_currently_playing`** is NOT an error — it means nothing is playing. The tool returns `is_playing: false`. Just report that to the user.
+
+**`429 Too Many Requests`** = rate limit. Wait and retry once. If it keeps happening, you're looping — stop.
+
+**`401 Unauthorized` after a retry** — refresh token revoked. Tell the user to run `hermes auth spotify` again.
+
+## URI and ID formats
+
+Spotify uses three interchangeable ID formats. The tools accept all three and normalize:
+
+- URI: `spotify:track:0DiWol3AO6WpXZgp0goxAV` (preferred)
+- URL: `https://open.spotify.com/track/0DiWol3AO6WpXZgp0goxAV`
+- Bare ID: `0DiWol3AO6WpXZgp0goxAV`
+
+When in doubt, use full URIs. Search results return URIs in the `uri` field — pass those directly.
+
+Entity types: `track`, `album`, `artist`, `playlist`, `show`, `episode`. Use the right type for the action — `spotify_playback.play` with a `context_uri` expects album/playlist/artist; `uris` expects an array of track URIs.
+
+## What NOT to do
+
+- **Don't call `get_state` before every action.** Spotify accepts play/pause/skip without preflight. Only inspect state when the user asked "what's playing" or you need to reason about device/track.
+- **Don't describe search results unless asked.** If the user said "play X", search, grab the top URI, play it. They'll hear it's wrong if it's wrong.
+- **Don't retry on `403 Premium required` or `403 No active device`.** Those are permanent until user action.
+- **Don't use `spotify_search` to find a playlist by name** — that searches the public Spotify catalog. User playlists come from `spotify_playlists list`.
+- **Don't mix `kind: "tracks"` with album URIs** in `spotify_library` (or vice versa). The tool normalizes IDs but the API endpoint differs.
--- a/website/docs/user-guide/skills/bundled/mlops/mlops-models-segment-anything.md
+++ b/website/docs/user-guide/skills/bundled/mlops/mlops-models-segment-anything.md
@ -151,6 +151,7 @@ masks = processor.image_processor.post_process_masks(

 ### Model architecture

+<!-- ascii-guard-ignore -->
 ```
 SAM Architecture:
 ┌─────────────────┐     ┌─────────────────┐     ┌─────────────────┐
@ -161,6 +162,7 @@ SAM Architecture:
   Image Embeddings      Prompt Embeddings         Masks + IoU
   (computed once)       (per prompt)             predictions
 ```
+<!-- ascii-guard-ignore-end -->

 ### Model variants

--- a/website/docs/user-guide/skills/bundled/mlops/mlops-training-unsloth.md
+++ b/website/docs/user-guide/skills/bundled/mlops/mlops-training-unsloth.md
@ -94,4 +94,4 @@ To refresh this skill with updated documentation:
 1. Re-run the scraper with the same configuration
 2. The skill will be rebuilt with the latest information

-&lt;!-- Trigger re-upload 1763621536 -->
+<!-- Trigger re-upload 1763621536 -->
--- a/website/docs/user-guide/skills/bundled/research/research-research-paper-writing.md
+++ b/website/docs/user-guide/skills/bundled/research/research-research-paper-writing.md
@ -36,6 +36,7 @@ End-to-end pipeline for producing publication-ready ML/AI research papers target

 This is **not a linear pipeline** — it is an iterative loop. Results trigger new experiments. Reviews trigger new analysis. The agent must handle these feedback loops.

+<!-- ascii-guard-ignore -->
 ```
 ┌─────────────────────────────────────────────────────────────┐
 │                    RESEARCH PAPER PIPELINE                  │
@ -55,6 +56,7 @@ This is **not a linear pipeline** — it is an iterative loop. Results trigger n
 │                                                             │
 └─────────────────────────────────────────────────────────────┘
 ```
+<!-- ascii-guard-ignore-end -->

 ---

--- a/website/scripts/generate-skill-docs.py
+++ b/website/scripts/generate-skill-docs.py
@ -120,6 +120,14 @@ def mdx_escape_body(body: str) -> str:
                elif ch == "}":
                    out.append("&#125;")
                elif ch == "<":
+                    # Preserve full HTML comments (e.g. ascii-guard ignore markers) — they
+                    # are not HTML tags, so the tag regex below would escape the leading <.
+                    if text[i:].startswith("<!--"):
+                        end = text.find("-->", i)
+                        if end != -1:
+                            out.append(text[i : end + 3])
+                            i = end + 3
+                            continue
                    # Look ahead to see if this is a valid HTML-ish tag.
                    # If it looks like a tag name then alnum/-/_ chars, leave it.
                    # Otherwise escape.
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@ -216,6 +216,7 @@ const sidebars: SidebarsConfig = {
                'user-guide/skills/bundled/media/media-gif-search',
                'user-guide/skills/bundled/media/media-heartmula',
                'user-guide/skills/bundled/media/media-songsee',
+                'user-guide/skills/bundled/media/media-spotify',
                'user-guide/skills/bundled/media/media-youtube-content',
              ],
            },