mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
changes
This commit is contained in:
parent
c5386ed7e6
commit
e698b7e0e5
19 changed files with 3924 additions and 132 deletions
387
api_endpoint/websocket_logger.py
Normal file
387
api_endpoint/websocket_logger.py
Normal file
|
|
@ -0,0 +1,387 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
WebSocket Logger Client
|
||||
|
||||
Simple client for sending agent events to the logging server via WebSocket.
|
||||
Used by the agent to log events in real-time during execution.
|
||||
"""
|
||||
|
||||
import json
|
||||
import asyncio
|
||||
from typing import Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
import websockets
|
||||
|
||||
|
||||
class WebSocketLogger:
|
||||
"""
|
||||
Client for logging agent events via WebSocket.
|
||||
|
||||
Usage:
|
||||
logger = WebSocketLogger("unique-session-id")
|
||||
await logger.connect()
|
||||
await logger.log_query("What is Python?", model="gpt-4")
|
||||
await logger.log_api_call(call_number=1)
|
||||
await logger.log_response(call_number=1, content="Python is...")
|
||||
await logger.disconnect()
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
session_id: str,
|
||||
server_url: str = "ws://localhost:8000/ws",
|
||||
enabled: bool = True
|
||||
):
|
||||
"""
|
||||
Initialize WebSocket logger.
|
||||
|
||||
Args:
|
||||
session_id: Unique identifier for this agent session
|
||||
server_url: WebSocket server URL (default: ws://localhost:8000/ws)
|
||||
enabled: Whether logging is enabled (default: True)
|
||||
"""
|
||||
self.session_id = session_id
|
||||
self.server_url = server_url
|
||||
self.enabled = enabled
|
||||
self.websocket: Optional[websockets.WebSocketClientProtocol] = None
|
||||
self.connected = False
|
||||
self.reconnect_count = 0 # Track reconnections for debugging
|
||||
|
||||
async def connect(self):
|
||||
"""
|
||||
Connect to the WebSocket logging server.
|
||||
|
||||
Establishes WebSocket connection and sends initial session_start event.
|
||||
If connection fails, gracefully disables logging (agent continues normally).
|
||||
"""
|
||||
if not self.enabled:
|
||||
return
|
||||
|
||||
try:
|
||||
# Establish WebSocket connection to the server
|
||||
# Use VERY LONG ping intervals to avoid timeout during long tool execution
|
||||
# The event loop is blocked during tool execution, so we can't process pings
|
||||
# Setting to very large values (1 hour) effectively disables it
|
||||
self.websocket = await websockets.connect(
|
||||
self.server_url,
|
||||
ping_interval=3600, # 1 hour - effectively disabled (event loop blocked anyway)
|
||||
ping_timeout=3600, # 1 hour timeout for pong response
|
||||
close_timeout=10, # Timeout for close handshake
|
||||
max_size=10 * 1024 * 1024, # 10MB max message size (for large raw_results)
|
||||
open_timeout=10 # Timeout for initial connection
|
||||
)
|
||||
self.connected = True
|
||||
print(f"✅ Connected to logging server (ping/pong: 3600s intervals): {self.server_url}")
|
||||
|
||||
# Send initial session_start event
|
||||
# This tells the server to create a new SessionLogger for this session
|
||||
await self._send_event("session_start", {
|
||||
"session_id": self.session_id,
|
||||
"start_time": datetime.now().isoformat()
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
# Connection failed - disable logging but don't crash the agent
|
||||
print(f"⚠️ Failed to connect to logging server: {e}")
|
||||
print(f" Logging will be disabled for this session.")
|
||||
self.enabled = False
|
||||
self.connected = False
|
||||
|
||||
async def disconnect(self):
|
||||
"""Disconnect from the WebSocket server."""
|
||||
if self.websocket and self.connected:
|
||||
try:
|
||||
await self.websocket.close()
|
||||
self.connected = False
|
||||
print(f"✅ Disconnected from logging server")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Error disconnecting: {e}")
|
||||
|
||||
async def _send_event(self, event_type: str, data: Dict[str, Any]):
|
||||
"""
|
||||
Send an event to the logging server.
|
||||
|
||||
This is the core method that sends all events via WebSocket.
|
||||
Creates a standardized message format and handles acknowledgments.
|
||||
|
||||
Args:
|
||||
event_type: Type of event (query, api_call, response, tool_call, tool_result, error, complete)
|
||||
data: Event data dictionary containing event-specific information
|
||||
"""
|
||||
# Safety check: Don't send if logging is disabled
|
||||
if not self.enabled:
|
||||
return
|
||||
|
||||
# Auto-reconnect if connection was lost
|
||||
if not self.connected or not self.websocket:
|
||||
try:
|
||||
self.reconnect_count += 1
|
||||
print(f"🔄 Reconnecting to logging server (attempt #{self.reconnect_count})...")
|
||||
await self.connect()
|
||||
print(f"✅ Reconnected successfully!")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Failed to reconnect: {e}")
|
||||
self.enabled = False # Disable logging after failed reconnect
|
||||
return
|
||||
|
||||
try:
|
||||
# Create standardized message structure
|
||||
# All events follow this format for consistent server-side handling
|
||||
message = {
|
||||
"session_id": self.session_id, # Links event to specific agent session
|
||||
"event_type": event_type, # Identifies what kind of event this is
|
||||
"data": data # Event-specific payload
|
||||
}
|
||||
|
||||
# Send message as JSON string over WebSocket
|
||||
await self.websocket.send(json.dumps(message))
|
||||
|
||||
# Wait for server acknowledgment (with 1 second timeout)
|
||||
# This ensures the server received and processed the event
|
||||
try:
|
||||
response = await asyncio.wait_for(
|
||||
self.websocket.recv(),
|
||||
timeout=1.0
|
||||
)
|
||||
# Server sends back: {"status": "logged", "session_id": "...", "event_type": "..."}
|
||||
# We don't need to process it, just confirms receipt
|
||||
except asyncio.TimeoutError:
|
||||
# No response within 1 second - that's okay, continue anyway
|
||||
# Server might be busy or network slow, but event was likely sent
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
# Log error but don't crash - graceful degradation
|
||||
# Agent should continue working even if logging fails
|
||||
error_str = str(e)
|
||||
|
||||
# Check if connection was closed (error 1011 = keepalive ping timeout)
|
||||
if "1011" in error_str or "closed" in error_str.lower():
|
||||
print(f"⚠️ WebSocket connection closed: {error_str}")
|
||||
self.connected = False # Mark as disconnected
|
||||
# Don't try to send more events - connection is dead
|
||||
else:
|
||||
print(f"⚠️ Error sending event to logging server: {e}")
|
||||
# Don't disable entirely or try to reconnect - just continue with logging disabled
|
||||
|
||||
# Convenience methods for specific event types
|
||||
|
||||
async def log_query(
|
||||
self,
|
||||
query: str,
|
||||
model: str = None,
|
||||
toolsets: list = None
|
||||
):
|
||||
"""
|
||||
Log a user query (the question/task given to the agent).
|
||||
|
||||
This is typically the first event in a session after connection.
|
||||
Captures what the user asked and which model/tools will be used.
|
||||
"""
|
||||
await self._send_event("query", {
|
||||
"query": query, # The user's question/instruction
|
||||
"model": model, # Which AI model is being used
|
||||
"toolsets": toolsets # Which tool categories are enabled
|
||||
})
|
||||
|
||||
async def log_api_call(
|
||||
self,
|
||||
call_number: int,
|
||||
message_count: int = None,
|
||||
has_tools: bool = None
|
||||
):
|
||||
"""
|
||||
Log an API call to the AI model.
|
||||
|
||||
Called right before sending a request to the model (OpenAI/Anthropic/etc).
|
||||
Helps track how many API calls are being made and conversation length.
|
||||
"""
|
||||
await self._send_event("api_call", {
|
||||
"call_number": call_number, # Sequential number (1, 2, 3...)
|
||||
"message_count": message_count, # How many messages in conversation so far
|
||||
"has_tools": has_tools # Whether tools are available to the model
|
||||
})
|
||||
|
||||
async def log_response(
|
||||
self,
|
||||
call_number: int,
|
||||
content: str = None,
|
||||
has_tool_calls: bool = False,
|
||||
tool_call_count: int = 0,
|
||||
duration: float = None
|
||||
):
|
||||
"""
|
||||
Log an assistant response from the AI model.
|
||||
|
||||
Called after receiving a response from the API.
|
||||
Captures what the model said and whether it wants to use tools.
|
||||
"""
|
||||
await self._send_event("response", {
|
||||
"call_number": call_number, # Which API call this response is from
|
||||
"content": content, # What the model said (text response)
|
||||
"has_tool_calls": has_tool_calls, # Did model request tool execution?
|
||||
"tool_call_count": tool_call_count, # How many tools does it want to call?
|
||||
"duration": duration # How long the API call took (seconds)
|
||||
})
|
||||
|
||||
async def log_tool_call(
|
||||
self,
|
||||
call_number: int,
|
||||
tool_index: int,
|
||||
tool_name: str,
|
||||
parameters: Dict[str, Any],
|
||||
tool_call_id: str = None
|
||||
):
|
||||
"""
|
||||
Log a tool call (before executing the tool).
|
||||
|
||||
Captures which tool is being called and with what parameters.
|
||||
This happens BEFORE the tool runs, so no results yet.
|
||||
"""
|
||||
await self._send_event("tool_call", {
|
||||
"call_number": call_number, # Which API call requested this tool
|
||||
"tool_index": tool_index, # Which tool in the sequence (if multiple)
|
||||
"tool_name": tool_name, # Name of tool (e.g., "web_search", "web_extract")
|
||||
"parameters": parameters, # Arguments passed to the tool (e.g., {"query": "Python", "limit": 5})
|
||||
"tool_call_id": tool_call_id # Unique ID to link call with result
|
||||
})
|
||||
|
||||
async def log_tool_result(
|
||||
self,
|
||||
call_number: int,
|
||||
tool_index: int,
|
||||
tool_name: str,
|
||||
result: str = None,
|
||||
error: str = None,
|
||||
duration: float = None,
|
||||
tool_call_id: str = None,
|
||||
raw_result: str = None # NEW: Full untruncated result for verification
|
||||
):
|
||||
"""
|
||||
Log a tool result (output from tool execution).
|
||||
|
||||
Captures both a truncated preview (for UI display) and the full raw result
|
||||
(for verification and debugging). This is especially important for web tools
|
||||
where you want to see what was scraped vs what the LLM processed.
|
||||
|
||||
Args:
|
||||
call_number: Which API call this tool was part of
|
||||
tool_index: Which tool in the sequence (1st, 2nd, etc.)
|
||||
tool_name: Name of the tool that was executed
|
||||
result: Tool output (will be truncated to 1000 chars for preview)
|
||||
error: Error message if tool failed
|
||||
duration: How long the tool took to execute (seconds)
|
||||
tool_call_id: Unique ID linking this result to the tool call
|
||||
raw_result: NEW - Full untruncated result for verification/debugging
|
||||
"""
|
||||
await self._send_event("tool_result", {
|
||||
"call_number": call_number,
|
||||
"tool_index": tool_index,
|
||||
"tool_name": tool_name,
|
||||
"result": result[:1000] if result else None, # Truncated preview (1000 chars max)
|
||||
"raw_result": raw_result, # NEW: Full result - can be 100KB+ for web scraping
|
||||
"error": error,
|
||||
"duration": duration,
|
||||
"tool_call_id": tool_call_id
|
||||
})
|
||||
|
||||
async def log_error(
|
||||
self,
|
||||
error_message: str,
|
||||
call_number: int = None
|
||||
):
|
||||
"""
|
||||
Log an error that occurred during agent execution.
|
||||
|
||||
Captures exceptions, API failures, or other issues.
|
||||
"""
|
||||
await self._send_event("error", {
|
||||
"error_message": error_message, # Description of what went wrong
|
||||
"call_number": call_number # Which API call caused the error (if applicable)
|
||||
})
|
||||
|
||||
async def log_complete(
|
||||
self,
|
||||
final_response: str = None,
|
||||
total_calls: int = None,
|
||||
completed: bool = True
|
||||
):
|
||||
"""
|
||||
Log session completion (final event before disconnecting).
|
||||
|
||||
Marks the end of the agent's execution and provides summary info.
|
||||
"""
|
||||
await self._send_event("complete", {
|
||||
"final_response": final_response[:500] if final_response else None, # Truncated summary of final answer
|
||||
"total_calls": total_calls, # How many API calls were made total
|
||||
"completed": completed # Did it complete successfully? (true/false)
|
||||
})
|
||||
|
||||
|
||||
# Synchronous wrapper for convenience
|
||||
class SyncWebSocketLogger:
|
||||
"""
|
||||
Synchronous wrapper around WebSocketLogger.
|
||||
|
||||
For use in synchronous code - creates an event loop internally.
|
||||
"""
|
||||
|
||||
def __init__(self, session_id: str, server_url: str = "ws://localhost:8000/ws", enabled: bool = True):
|
||||
self.logger = WebSocketLogger(session_id, server_url, enabled)
|
||||
self.loop = None
|
||||
|
||||
def connect(self):
|
||||
"""Connect to server (synchronous)."""
|
||||
self.loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(self.loop)
|
||||
self.loop.run_until_complete(self.logger.connect())
|
||||
|
||||
def disconnect(self):
|
||||
"""Disconnect from server (synchronous)."""
|
||||
if self.loop:
|
||||
self.loop.run_until_complete(self.logger.disconnect())
|
||||
self.loop.close()
|
||||
|
||||
def _run_async(self, coro):
|
||||
"""
|
||||
Run an async coroutine synchronously.
|
||||
|
||||
Bridge between sync code (agent) and async code (WebSocket).
|
||||
Uses event loop to execute async operations in sync context.
|
||||
"""
|
||||
if self.loop and self.loop.is_running():
|
||||
# Already in event loop, just await
|
||||
asyncio.create_task(coro)
|
||||
else:
|
||||
# Run in current loop
|
||||
if self.loop:
|
||||
self.loop.run_until_complete(coro)
|
||||
|
||||
def log_query(self, query: str, model: str = None, toolsets: list = None):
|
||||
self._run_async(self.logger.log_query(query, model, toolsets))
|
||||
|
||||
def log_api_call(self, call_number: int, message_count: int = None, has_tools: bool = None):
|
||||
self._run_async(self.logger.log_api_call(call_number, message_count, has_tools))
|
||||
|
||||
def log_response(self, call_number: int, content: str = None, has_tool_calls: bool = False,
|
||||
tool_call_count: int = 0, duration: float = None):
|
||||
self._run_async(self.logger.log_response(call_number, content, has_tool_calls,
|
||||
tool_call_count, duration))
|
||||
|
||||
def log_tool_call(self, call_number: int, tool_index: int, tool_name: str,
|
||||
parameters: Dict[str, Any], tool_call_id: str = None):
|
||||
self._run_async(self.logger.log_tool_call(call_number, tool_index, tool_name,
|
||||
parameters, tool_call_id))
|
||||
|
||||
def log_tool_result(self, call_number: int, tool_index: int, tool_name: str,
|
||||
result: str = None, error: str = None, duration: float = None,
|
||||
tool_call_id: str = None, raw_result: str = None):
|
||||
self._run_async(self.logger.log_tool_result(call_number, tool_index, tool_name,
|
||||
result, error, duration, tool_call_id, raw_result))
|
||||
|
||||
def log_error(self, error_message: str, call_number: int = None):
|
||||
self._run_async(self.logger.log_error(error_message, call_number))
|
||||
|
||||
def log_complete(self, final_response: str = None, total_calls: int = None, completed: bool = True):
|
||||
self._run_async(self.logger.log_complete(final_response, total_calls, completed))
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue