mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
fix(agent,gateway): surface partial-stream recovery and bound detached restart
Salvage of NousResearch/hermes-agent#41498 (0-CYBERDYNE-SYSTEMS-0). - Leave response_previewed false on partial_stream_recovery so gateway fallback delivery can send the recovered fragment plus explanation. - Always append the turn-completion explainer for partial_stream_recovery, not only for empty or very short fragments (#34452 gap). - Launch the detached /restart helper before drain, idempotently, with a bounded wait of restart_drain_timeout + 5s.
This commit is contained in:
parent
e3c9924b8b
commit
e860a40e14
3 changed files with 30 additions and 6 deletions
|
|
@ -4392,7 +4392,11 @@ def run_conversation(
|
|||
"as final response"
|
||||
)
|
||||
final_response = _recovered
|
||||
agent._response_was_previewed = True
|
||||
# Streaming delivered a fragment, not a confirmed
|
||||
# final preview. Leave response_previewed false so
|
||||
# gateway fallback delivery can send the recovered
|
||||
# text plus the abnormal-turn explanation.
|
||||
agent._response_was_previewed = False
|
||||
break
|
||||
|
||||
# If the previous turn already delivered real content alongside
|
||||
|
|
|
|||
|
|
@ -289,7 +289,14 @@ def finalize_turn(
|
|||
and len(_stripped) <= 24
|
||||
and _stripped[-1:] not in {".", "!", "?", "。", "!", "?", "`", ")"}
|
||||
)
|
||||
if _is_empty_terminal or _is_partial_fragment:
|
||||
_is_partial_stream_recovery = (
|
||||
str(_turn_exit_reason) == "partial_stream_recovery"
|
||||
)
|
||||
if (
|
||||
_is_empty_terminal
|
||||
or _is_partial_fragment
|
||||
or _is_partial_stream_recovery
|
||||
):
|
||||
_explanation = agent._format_turn_completion_explanation(
|
||||
_turn_exit_reason
|
||||
)
|
||||
|
|
|
|||
|
|
@ -2527,6 +2527,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
_restart_task_started: bool = False
|
||||
_restart_detached: bool = False
|
||||
_restart_via_service: bool = False
|
||||
_detached_restart_helper_started: bool = False
|
||||
_restart_command_source: Optional[SessionSource] = None
|
||||
_stop_task: Optional[asyncio.Task] = None
|
||||
_restart_task: Optional[asyncio.Task] = None
|
||||
|
|
@ -2620,6 +2621,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
self._restart_task_started = False
|
||||
self._restart_detached = False
|
||||
self._restart_via_service = False
|
||||
self._detached_restart_helper_started = False
|
||||
self._restart_command_source: Optional[SessionSource] = None
|
||||
self._stop_task: Optional[asyncio.Task] = None
|
||||
self._restart_task: Optional[asyncio.Task] = None
|
||||
|
|
@ -5360,8 +5362,12 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
if not hermes_cmd:
|
||||
logger.error("Could not locate hermes binary for detached /restart")
|
||||
return
|
||||
if self._detached_restart_helper_started:
|
||||
return
|
||||
self._detached_restart_helper_started = True
|
||||
|
||||
current_pid = os.getpid()
|
||||
restart_after_s = max(float(getattr(self, "_restart_drain_timeout", 0.0) or 0.0) + 5.0, 5.0)
|
||||
|
||||
# On Windows there's no bash/setsid chain — spawn a tiny Python
|
||||
# watcher directly via sys.executable instead. The watcher polls
|
||||
|
|
@ -5378,8 +5384,9 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
import os, subprocess, sys, time
|
||||
from hermes_cli._subprocess_compat import windows_detach_flags_without_breakaway
|
||||
pid = int(sys.argv[1])
|
||||
cmd = sys.argv[2:]
|
||||
deadline = time.monotonic() + 120
|
||||
restart_after_s = float(sys.argv[2])
|
||||
cmd = sys.argv[3:]
|
||||
deadline = time.monotonic() + restart_after_s
|
||||
|
||||
def _alive(p):
|
||||
# On Windows, os.kill(pid, 0) is NOT a no-op — it maps to
|
||||
|
|
@ -5435,7 +5442,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
pythonpath.append(watcher_env["PYTHONPATH"])
|
||||
watcher_env["PYTHONPATH"] = os.pathsep.join(dict.fromkeys(pythonpath))
|
||||
subprocess.Popen(
|
||||
[sys.executable, "-c", watcher, str(current_pid), *cmd_argv],
|
||||
[sys.executable, "-c", watcher, str(current_pid), str(restart_after_s), *cmd_argv],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
env=watcher_env,
|
||||
|
|
@ -5445,7 +5452,8 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
|
||||
cmd = " ".join(shlex.quote(part) for part in hermes_cmd)
|
||||
shell_cmd = (
|
||||
f"while kill -0 {current_pid} 2>/dev/null; do sleep 0.2; done; "
|
||||
f"deadline=$(( $(date +%s) + {int(restart_after_s)} )); "
|
||||
f"while kill -0 {current_pid} 2>/dev/null && [ $(date +%s) -lt $deadline ]; do sleep 0.2; done; "
|
||||
f"{cmd} gateway restart"
|
||||
)
|
||||
# Same marker scrub as the Windows watcher above: this watcher runs
|
||||
|
|
@ -5559,6 +5567,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
self._restart_task_started = True
|
||||
|
||||
async def _run_restart() -> None:
|
||||
if detached:
|
||||
try:
|
||||
await self._launch_detached_restart_command()
|
||||
except Exception as e:
|
||||
logger.error("Failed to launch detached gateway restart helper: %s", e)
|
||||
await asyncio.sleep(0.05)
|
||||
await self.stop(restart=True, detached_restart=detached, service_restart=via_service)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue