Merge remote-tracking branch 'origin/main' into bb/grok-provider-desktop

This commit is contained in:
Brooklyn Nicholson 2026-06-02 18:41:32 -05:00
commit 123b945731
4 changed files with 76 additions and 33 deletions

View file

@ -37,23 +37,16 @@ jobs:
- name: Check flake
id: flake
if: runner.os == 'Linux'
continue-on-error: true
run: nix flake check --print-build-logs
- name: Build package
id: build
if: runner.os == 'Linux'
continue-on-error: true
run: nix build --print-build-logs
# When the real Nix build fails, run a targeted diagnostic to see if
# When the flake check fails, run a targeted diagnostic to see if
# the failure is specifically a stale npm lockfile hash in one of the
# known npm subpackages (tui / web). This avoids surfacing a generic
# "build failed" message when the fix is a single known command.
- name: Diagnose npm lockfile hashes
id: hash_check
if: (steps.flake.outcome == 'failure' || steps.build.outcome == 'failure') && runner.os == 'Linux'
if: steps.flake.outcome == 'failure' && runner.os == 'Linux'
continue-on-error: true
env:
LINK_SHA: ${{ steps.sha.outputs.full }}
@ -88,30 +81,25 @@ jobs:
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
- Or locally: `nix run .#fix-lockfiles` and commit the diff
# Clear the sticky comment when either the build passed outright (no
# Clear the sticky comment when either the flake check passed outright (no
# hash check needed) or the hash check explicitly returned stale=false
# (build failed for a non-hash reason).
# (check failed for a non-hash reason).
- name: Clear sticky PR comment (resolved)
if: |
github.event_name == 'pull_request' &&
runner.os == 'Linux' &&
(steps.hash_check.outputs.stale == 'false' ||
(steps.flake.outcome == 'success' && steps.build.outcome == 'success'))
steps.flake.outcome == 'success')
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
delete: true
- name: Final fail if build or flake failed
if: steps.flake.outcome == 'failure' || steps.build.outcome == 'failure'
- name: Final fail if flake check failed
if: steps.flake.outcome == 'failure'
run: |
if [ "${{ steps.hash_check.outputs.stale }}" == "true" ]; then
echo "::error::Nix build failed due to stale npm lockfile hash. Run: nix run .#fix-lockfiles"
else
echo "::error::Nix build/flake check failed. See logs above."
echo "::error::Nix flake check failed. See logs above."
fi
exit 1
- name: Evaluate flake (macOS)
if: runner.os == 'macOS'
run: nix flake show --json > /dev/null

View file

@ -9,6 +9,8 @@ Usage:
python -m hermes_cli.main web --port 8080
"""
from contextlib import asynccontextmanager
import asyncio
import base64
import binascii
@ -84,7 +86,43 @@ except ImportError:
WEB_DIST = Path(os.environ["HERMES_WEB_DIST"]) if "HERMES_WEB_DIST" in os.environ else Path(__file__).parent / "web_dist"
_log = logging.getLogger(__name__)
app = FastAPI(title="Hermes Agent", version=__version__)
# ---------------------------------------------------------------------------
# Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard)
# and /api/events (dashboard → browser sidebar). Keyed by an opaque channel id
# the chat tab generates on mount; entries auto-evict when the last subscriber
# drops AND the publisher has disconnected.
#
# State lives on app.state (not module-level globals) so that asyncio.Lock is
# created on the running event loop during lifespan startup. A module-level
# asyncio.Lock() binds to whatever loop was active at import time, which breaks
# when the same module is used across TestClient instances or uvicorn reloads.
# ---------------------------------------------------------------------------
@asynccontextmanager
async def _lifespan(app: "FastAPI"):
app.state.event_channels = {} # dict[str, set]
app.state.event_lock = asyncio.Lock()
yield
def _get_event_state(app: "FastAPI"):
"""Return (event_channels, event_lock) from app.state.
Lazily initialises the state if the lifespan hasn't run (e.g. when
TestClient is constructed without a ``with`` block). The lifespan
path is preferred because it guarantees the Lock is created on the
correct event loop, but the lazy path lets existing non-``with``
TestClient usages keep working.
"""
try:
return app.state.event_channels, app.state.event_lock
except AttributeError:
app.state.event_channels = {}
app.state.event_lock = asyncio.Lock()
return app.state.event_channels, app.state.event_lock
app = FastAPI(title="Hermes Agent", version=__version__, lifespan=_lifespan)
# ---------------------------------------------------------------------------
# Session token for protecting sensitive endpoints (reveal).
@ -6631,8 +6669,7 @@ def _ws_auth_ok(ws: "WebSocket") -> bool:
# and /api/events (dashboard → browser sidebar). Keyed by an opaque channel id
# the chat tab generates on mount; entries auto-evict when the last subscriber
# drops AND the publisher has disconnected.
_event_channels: dict[str, set] = {}
_event_lock = asyncio.Lock()
# (State is initialised in _lifespan on app startup — see above.)
def _resolve_chat_argv(
@ -6741,10 +6778,11 @@ def _build_sidecar_url(channel: str) -> Optional[str]:
return f"ws://{netloc}/api/pub?{qs}"
async def _broadcast_event(channel: str, payload: str) -> None:
async def _broadcast_event(app: Any, channel: str, payload: str) -> None:
"""Fan out one publisher frame to every subscriber on `channel`."""
async with _event_lock:
subs = list(_event_channels.get(channel, ()))
event_channels, event_lock = _get_event_state(app)
async with event_lock:
subs = list(event_channels.get(channel, ()))
for sub in subs:
try:
@ -6935,7 +6973,7 @@ async def pub_ws(ws: WebSocket) -> None:
try:
while True:
await _broadcast_event(channel, await ws.receive_text())
await _broadcast_event(ws.app, channel, await ws.receive_text())
except WebSocketDisconnect:
pass
@ -6961,8 +6999,9 @@ async def events_ws(ws: WebSocket) -> None:
await ws.accept()
async with _event_lock:
_event_channels.setdefault(channel, set()).add(ws)
event_channels, event_lock = _get_event_state(ws.app)
async with event_lock:
event_channels.setdefault(channel, set()).add(ws)
try:
while True:
@ -6973,14 +7012,14 @@ async def events_ws(ws: WebSocket) -> None:
except WebSocketDisconnect:
pass
finally:
async with _event_lock:
subs = _event_channels.get(channel)
async with event_lock:
subs = event_channels.get(channel)
if subs is not None:
subs.discard(ws)
if not subs:
_event_channels.pop(channel, None)
event_channels.pop(channel, None)
def _normalise_prefix(raw: Optional[str]) -> str:

View file

@ -58,6 +58,22 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
echo "ok" > $out/result
''
);
# Verify the default package builds successfully (cross-platform).
# On Linux the runtime checks below already depend on the package,
# but this ensures darwin builders also build it during flake check.
build-package = pkgs.runCommand "hermes-build-package" { } ''
echo "PASS: package built at ${hermes-agent}"
mkdir -p $out
echo "ok" > $out/result
'';
# Verify the devShell builds successfully (cross-platform).
build-devshell = pkgs.runCommand "hermes-build-devshell" { } ''
echo "PASS: devShell built at ${self'.devShells.default}"
mkdir -p $out
echo "ok" > $out/result
'';
} // lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux {
# Verify binaries exist and are executable
package-contents = pkgs.runCommand "hermes-package-contents" { } ''

View file

@ -3415,7 +3415,7 @@ class TestPtyWebSocket:
# subscriber registration and the message is dropped.
deadline = time.monotonic() + 5.0
while time.monotonic() < deadline:
if ws_mod._event_channels.get("broadcast-test"):
if ws_mod.app.state.event_channels.get("broadcast-test"):
break
time.sleep(0.01)
else: