mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(honcho): isolate session routing for multi-user gateway (#1500)
Salvaged from PR #1470 by adavyas. Core fix: Honcho tool calls in a multi-session gateway could route to the wrong session because honcho_tools.py relied on process-global state. Now threads session context through the call chain: AIAgent._invoke_tool() → handle_function_call() → registry.dispatch() → handler **kw → _resolve_session_context() Changes: - Add _resolve_session_context() to prefer per-call context over globals - Plumb honcho_manager + honcho_session_key through handle_function_call - Add sync_honcho=False to run_conversation() for synthetic flush turns - Pass honcho_session_key through gateway memory flush lifecycle - Harden gateway PID detection when /proc cmdline is unreadable - Make interrupt test scripts import-safe for pytest-xdist - Wrap BibTeX examples in Jekyll raw blocks for docs build - Fix thread-order-dependent assertion in client lifecycle test - Expand Honcho docs: session isolation, lifecycle, routing internals Dropped from original PR: - Indentation change in _create_request_openai_client that would move client creation inside the lock (causes unnecessary contention) Co-authored-by: adavyas <adavyas@users.noreply.github.com>
This commit is contained in:
parent
eb4f0348e1
commit
dd7921d514
17 changed files with 522 additions and 252 deletions
|
|
@ -16,126 +16,131 @@ from run_agent import AIAgent, IterationBudget
|
|||
from tools.delegate_tool import _run_single_child
|
||||
from tools.interrupt import set_interrupt, is_interrupted
|
||||
|
||||
set_interrupt(False)
|
||||
def main() -> int:
|
||||
set_interrupt(False)
|
||||
|
||||
# Create parent agent (minimal)
|
||||
parent = AIAgent.__new__(AIAgent)
|
||||
parent._interrupt_requested = False
|
||||
parent._interrupt_message = None
|
||||
parent._active_children = []
|
||||
parent.quiet_mode = True
|
||||
parent.model = "test/model"
|
||||
parent.base_url = "http://localhost:1"
|
||||
parent.api_key = "test"
|
||||
parent.provider = "test"
|
||||
parent.api_mode = "chat_completions"
|
||||
parent.platform = "cli"
|
||||
parent.enabled_toolsets = ["terminal", "file"]
|
||||
parent.providers_allowed = None
|
||||
parent.providers_ignored = None
|
||||
parent.providers_order = None
|
||||
parent.provider_sort = None
|
||||
parent.max_tokens = None
|
||||
parent.reasoning_config = None
|
||||
parent.prefill_messages = None
|
||||
parent._session_db = None
|
||||
parent._delegate_depth = 0
|
||||
parent._delegate_spinner = None
|
||||
parent.tool_progress_callback = None
|
||||
parent.iteration_budget = IterationBudget(max_total=100)
|
||||
parent._client_kwargs = {"api_key": "test", "base_url": "http://localhost:1"}
|
||||
# Create parent agent (minimal)
|
||||
parent = AIAgent.__new__(AIAgent)
|
||||
parent._interrupt_requested = False
|
||||
parent._interrupt_message = None
|
||||
parent._active_children = []
|
||||
parent.quiet_mode = True
|
||||
parent.model = "test/model"
|
||||
parent.base_url = "http://localhost:1"
|
||||
parent.api_key = "test"
|
||||
parent.provider = "test"
|
||||
parent.api_mode = "chat_completions"
|
||||
parent.platform = "cli"
|
||||
parent.enabled_toolsets = ["terminal", "file"]
|
||||
parent.providers_allowed = None
|
||||
parent.providers_ignored = None
|
||||
parent.providers_order = None
|
||||
parent.provider_sort = None
|
||||
parent.max_tokens = None
|
||||
parent.reasoning_config = None
|
||||
parent.prefill_messages = None
|
||||
parent._session_db = None
|
||||
parent._delegate_depth = 0
|
||||
parent._delegate_spinner = None
|
||||
parent.tool_progress_callback = None
|
||||
parent.iteration_budget = IterationBudget(max_total=100)
|
||||
parent._client_kwargs = {"api_key": "test", "base_url": "http://localhost:1"}
|
||||
|
||||
child_started = threading.Event()
|
||||
result_holder = [None]
|
||||
child_started = threading.Event()
|
||||
result_holder = [None]
|
||||
|
||||
def run_delegate():
|
||||
with patch("run_agent.OpenAI") as MockOpenAI:
|
||||
mock_client = MagicMock()
|
||||
|
||||
def run_delegate():
|
||||
with patch("run_agent.OpenAI") as MockOpenAI:
|
||||
mock_client = MagicMock()
|
||||
def slow_create(**kwargs):
|
||||
time.sleep(3)
|
||||
resp = MagicMock()
|
||||
resp.choices = [MagicMock()]
|
||||
resp.choices[0].message.content = "Done"
|
||||
resp.choices[0].message.tool_calls = None
|
||||
resp.choices[0].message.refusal = None
|
||||
resp.choices[0].finish_reason = "stop"
|
||||
resp.usage.prompt_tokens = 100
|
||||
resp.usage.completion_tokens = 10
|
||||
resp.usage.total_tokens = 110
|
||||
resp.usage.prompt_tokens_details = None
|
||||
return resp
|
||||
|
||||
def slow_create(**kwargs):
|
||||
time.sleep(3)
|
||||
resp = MagicMock()
|
||||
resp.choices = [MagicMock()]
|
||||
resp.choices[0].message.content = "Done"
|
||||
resp.choices[0].message.tool_calls = None
|
||||
resp.choices[0].message.refusal = None
|
||||
resp.choices[0].finish_reason = "stop"
|
||||
resp.usage.prompt_tokens = 100
|
||||
resp.usage.completion_tokens = 10
|
||||
resp.usage.total_tokens = 110
|
||||
resp.usage.prompt_tokens_details = None
|
||||
return resp
|
||||
mock_client.chat.completions.create = slow_create
|
||||
mock_client.close = MagicMock()
|
||||
MockOpenAI.return_value = mock_client
|
||||
|
||||
mock_client.chat.completions.create = slow_create
|
||||
mock_client.close = MagicMock()
|
||||
MockOpenAI.return_value = mock_client
|
||||
original_init = AIAgent.__init__
|
||||
|
||||
original_init = AIAgent.__init__
|
||||
def patched_init(self_agent, *a, **kw):
|
||||
original_init(self_agent, *a, **kw)
|
||||
child_started.set()
|
||||
|
||||
def patched_init(self_agent, *a, **kw):
|
||||
original_init(self_agent, *a, **kw)
|
||||
child_started.set()
|
||||
with patch.object(AIAgent, "__init__", patched_init):
|
||||
try:
|
||||
result = _run_single_child(
|
||||
task_index=0,
|
||||
goal="Test slow task",
|
||||
context=None,
|
||||
toolsets=["terminal"],
|
||||
model="test/model",
|
||||
max_iterations=5,
|
||||
parent_agent=parent,
|
||||
task_count=1,
|
||||
override_provider="test",
|
||||
override_base_url="http://localhost:1",
|
||||
override_api_key="test",
|
||||
override_api_mode="chat_completions",
|
||||
)
|
||||
result_holder[0] = result
|
||||
except Exception as e:
|
||||
print(f"ERROR in delegate: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
with patch.object(AIAgent, "__init__", patched_init):
|
||||
try:
|
||||
result = _run_single_child(
|
||||
task_index=0,
|
||||
goal="Test slow task",
|
||||
context=None,
|
||||
toolsets=["terminal"],
|
||||
model="test/model",
|
||||
max_iterations=5,
|
||||
parent_agent=parent,
|
||||
task_count=1,
|
||||
override_provider="test",
|
||||
override_base_url="http://localhost:1",
|
||||
override_api_key="test",
|
||||
override_api_mode="chat_completions",
|
||||
)
|
||||
result_holder[0] = result
|
||||
except Exception as e:
|
||||
print(f"ERROR in delegate: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
print("Starting agent thread...")
|
||||
agent_thread = threading.Thread(target=run_delegate, daemon=True)
|
||||
agent_thread.start()
|
||||
|
||||
started = child_started.wait(timeout=10)
|
||||
if not started:
|
||||
print("ERROR: Child never started")
|
||||
set_interrupt(False)
|
||||
return 1
|
||||
|
||||
print("Starting agent thread...")
|
||||
agent_thread = threading.Thread(target=run_delegate, daemon=True)
|
||||
agent_thread.start()
|
||||
time.sleep(0.5)
|
||||
|
||||
started = child_started.wait(timeout=10)
|
||||
if not started:
|
||||
print("ERROR: Child never started")
|
||||
sys.exit(1)
|
||||
print(f"Active children: {len(parent._active_children)}")
|
||||
for i, c in enumerate(parent._active_children):
|
||||
print(f" Child {i}: _interrupt_requested={c._interrupt_requested}")
|
||||
|
||||
time.sleep(0.5)
|
||||
t0 = time.monotonic()
|
||||
parent.interrupt("User typed a new message")
|
||||
print("Called parent.interrupt()")
|
||||
|
||||
print(f"Active children: {len(parent._active_children)}")
|
||||
for i, c in enumerate(parent._active_children):
|
||||
print(f" Child {i}: _interrupt_requested={c._interrupt_requested}")
|
||||
for i, c in enumerate(parent._active_children):
|
||||
print(f" Child {i} after interrupt: _interrupt_requested={c._interrupt_requested}")
|
||||
print(f"Global is_interrupted: {is_interrupted()}")
|
||||
|
||||
t0 = time.monotonic()
|
||||
parent.interrupt("User typed a new message")
|
||||
print(f"Called parent.interrupt()")
|
||||
agent_thread.join(timeout=10)
|
||||
elapsed = time.monotonic() - t0
|
||||
print(f"Agent thread finished in {elapsed:.2f}s")
|
||||
|
||||
for i, c in enumerate(parent._active_children):
|
||||
print(f" Child {i} after interrupt: _interrupt_requested={c._interrupt_requested}")
|
||||
print(f"Global is_interrupted: {is_interrupted()}")
|
||||
|
||||
agent_thread.join(timeout=10)
|
||||
elapsed = time.monotonic() - t0
|
||||
print(f"Agent thread finished in {elapsed:.2f}s")
|
||||
|
||||
result = result_holder[0]
|
||||
if result:
|
||||
print(f"Status: {result['status']}")
|
||||
print(f"Duration: {result['duration_seconds']}s")
|
||||
if elapsed < 2.0:
|
||||
print("✅ PASS: Interrupt detected quickly!")
|
||||
result = result_holder[0]
|
||||
if result:
|
||||
print(f"Status: {result['status']}")
|
||||
print(f"Duration: {result['duration_seconds']}s")
|
||||
if elapsed < 2.0:
|
||||
print("✅ PASS: Interrupt detected quickly!")
|
||||
else:
|
||||
print(f"❌ FAIL: Took {elapsed:.2f}s — interrupt was too slow or not detected")
|
||||
else:
|
||||
print(f"❌ FAIL: Took {elapsed:.2f}s — interrupt was too slow or not detected")
|
||||
else:
|
||||
print("❌ FAIL: No result!")
|
||||
print("❌ FAIL: No result!")
|
||||
|
||||
set_interrupt(False)
|
||||
set_interrupt(False)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue