fix: auto-detect D-Bus session bus for systemctl --user on headless servers (#1601)

* fix: Anthropic OAuth compatibility — Claude Code identity fingerprinting

Anthropic routes OAuth/subscription requests based on Claude Code's
identity markers. Without them, requests get intermittent 500 errors
(~25% failure rate observed). This matches what pi-ai (clawdbot) and
OpenCode both implement for OAuth compatibility.

Changes (OAuth tokens only — API key users unaffected):

1. Headers: user-agent 'claude-cli/2.1.2 (external, cli)' + x-app 'cli'
2. System prompt: prepend 'You are Claude Code, Anthropic's official CLI'
3. System prompt sanitization: replace Hermes/Nous references
4. Tool names: prefix with 'mcp_' (Claude Code convention for non-native tools)
5. Tool name stripping: remove 'mcp_' prefix from response tool calls

Before: 9/12 OK, 1 hard fail, 4 needed retries (~25% error rate)
After: 16/16 OK, 0 failures, 0 retries (0% error rate)

* fix: auto-detect DBUS_SESSION_BUS_ADDRESS for systemctl --user on headless servers

On SSH sessions to headless servers, DBUS_SESSION_BUS_ADDRESS and
XDG_RUNTIME_DIR may not be set even when the user's systemd instance
is running via linger. This causes 'systemctl --user' to fail with
'Failed to connect to bus: No medium found', breaking gateway
restart/start/stop as a service and falling back to foreground mode.

Add _ensure_user_systemd_env() that detects the standard D-Bus socket
at /run/user/<UID>/bus and sets the env vars before any systemctl --user
call. Called from _systemctl_cmd() so all existing call sites benefit
automatically with zero changes.

Fixes: gateway restart falling back to foreground on headless servers

* fix: show linger guidance when gateway restart fails during update and gateway restart

When systemctl --user restart fails during 'hermes update' or
'hermes gateway restart', check linger status and tell the user
exactly what to run (sudo -S -p '' loginctl enable-linger) instead of
silently falling back to foreground mode.

Also applies _ensure_user_systemd_env() to the raw systemctl calls
in cmd_update so they work properly on SSH sessions where D-Bus
env vars are missing.
This commit is contained in:
Teknium 2026-03-16 17:45:48 -07:00 committed by GitHub
parent ce430fed4c
commit 60e38e82ec
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 136 additions and 2 deletions

View file

@ -1,5 +1,6 @@
"""Tests for gateway service management helpers."""
import os
from types import SimpleNamespace
import hermes_cli.gateway as gateway_cli
@ -156,3 +157,81 @@ class TestGatewaySystemServiceRouting:
gateway_cli.gateway_command(SimpleNamespace(gateway_command="status", deep=False, system=False))
assert calls == [(False, False)]
class TestEnsureUserSystemdEnv:
"""Tests for _ensure_user_systemd_env() D-Bus session bus auto-detection."""
def test_sets_xdg_runtime_dir_when_missing(self, tmp_path, monkeypatch):
monkeypatch.delenv("XDG_RUNTIME_DIR", raising=False)
monkeypatch.delenv("DBUS_SESSION_BUS_ADDRESS", raising=False)
monkeypatch.setattr(os, "getuid", lambda: 42)
# Patch Path so /run/user/42 resolves to our tmp dir (which exists)
from pathlib import Path as RealPath
class FakePath(type(RealPath())):
def __new__(cls, *args):
p = str(args[0]) if args else ""
if p == "/run/user/42":
return RealPath.__new__(cls, str(tmp_path))
return RealPath.__new__(cls, *args)
monkeypatch.setattr(gateway_cli, "Path", FakePath)
gateway_cli._ensure_user_systemd_env()
# Function sets the canonical string, not the fake path
assert os.environ.get("XDG_RUNTIME_DIR") == "/run/user/42"
def test_sets_dbus_address_when_bus_socket_exists(self, tmp_path, monkeypatch):
runtime = tmp_path / "runtime"
runtime.mkdir()
bus_socket = runtime / "bus"
bus_socket.touch() # simulate the socket file
monkeypatch.setenv("XDG_RUNTIME_DIR", str(runtime))
monkeypatch.delenv("DBUS_SESSION_BUS_ADDRESS", raising=False)
monkeypatch.setattr(os, "getuid", lambda: 99)
gateway_cli._ensure_user_systemd_env()
assert os.environ["DBUS_SESSION_BUS_ADDRESS"] == f"unix:path={bus_socket}"
def test_preserves_existing_env_vars(self, monkeypatch):
monkeypatch.setenv("XDG_RUNTIME_DIR", "/custom/runtime")
monkeypatch.setenv("DBUS_SESSION_BUS_ADDRESS", "unix:path=/custom/bus")
gateway_cli._ensure_user_systemd_env()
assert os.environ["XDG_RUNTIME_DIR"] == "/custom/runtime"
assert os.environ["DBUS_SESSION_BUS_ADDRESS"] == "unix:path=/custom/bus"
def test_no_dbus_when_bus_socket_missing(self, tmp_path, monkeypatch):
runtime = tmp_path / "runtime"
runtime.mkdir()
# no bus socket created
monkeypatch.setenv("XDG_RUNTIME_DIR", str(runtime))
monkeypatch.delenv("DBUS_SESSION_BUS_ADDRESS", raising=False)
monkeypatch.setattr(os, "getuid", lambda: 99)
gateway_cli._ensure_user_systemd_env()
assert "DBUS_SESSION_BUS_ADDRESS" not in os.environ
def test_systemctl_cmd_calls_ensure_for_user_mode(self, monkeypatch):
calls = []
monkeypatch.setattr(gateway_cli, "_ensure_user_systemd_env", lambda: calls.append("called"))
result = gateway_cli._systemctl_cmd(system=False)
assert result == ["systemctl", "--user"]
assert calls == ["called"]
def test_systemctl_cmd_skips_ensure_for_system_mode(self, monkeypatch):
calls = []
monkeypatch.setattr(gateway_cli, "_ensure_user_systemd_env", lambda: calls.append("called"))
result = gateway_cli._systemctl_cmd(system=True)
assert result == ["systemctl"]
assert calls == []