mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-26 01:01:40 +00:00
changes
This commit is contained in:
parent
c5386ed7e6
commit
e698b7e0e5
19 changed files with 3924 additions and 132 deletions
26
api_endpoint/__init__.py
Normal file
26
api_endpoint/__init__.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
"""
|
||||
Hermes Agent - API Endpoint & Real-time Logging
|
||||
|
||||
This package provides a FastAPI WebSocket endpoint for real-time logging of the Hermes Agent.
|
||||
|
||||
Components:
|
||||
- logging_server: FastAPI server that receives and stores events
|
||||
- websocket_logger: Client library for sending events from the agent
|
||||
|
||||
Usage:
|
||||
# Start the API endpoint server
|
||||
python api_endpoint/logging_server.py
|
||||
|
||||
# Use in agent code
|
||||
from api_endpoint.websocket_logger import WebSocketLogger
|
||||
|
||||
For more information, see:
|
||||
- WEBSOCKET_LOGGING_GUIDE.md - User guide
|
||||
- IMPLEMENTATION_SUMMARY.md - Technical details
|
||||
"""
|
||||
|
||||
from .websocket_logger import WebSocketLogger, SyncWebSocketLogger
|
||||
|
||||
__all__ = ['WebSocketLogger', 'SyncWebSocketLogger']
|
||||
__version__ = '1.0.0'
|
||||
|
||||
597
api_endpoint/logging_server.py
Normal file
597
api_endpoint/logging_server.py
Normal file
|
|
@ -0,0 +1,597 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Hermes Agent - Real-time Logging Server
|
||||
|
||||
A FastAPI server with WebSocket support that listens for agent execution events
|
||||
and logs them to JSON files in real-time.
|
||||
|
||||
Events tracked:
|
||||
- User queries
|
||||
- API calls (requests to the model)
|
||||
- Assistant responses
|
||||
- Tool calls (name, parameters, timing)
|
||||
- Tool results (outputs, errors, duration)
|
||||
- Final responses
|
||||
- Session metadata
|
||||
|
||||
Usage:
|
||||
python logging_server.py
|
||||
|
||||
Or with uvicorn directly:
|
||||
uvicorn logging_server:app --host 0.0.0.0 --port 8000 --reload
|
||||
|
||||
The server will listen for WebSocket connections at ws://localhost:8000/ws
|
||||
"""
|
||||
|
||||
import json
|
||||
import asyncio
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, List, Optional
|
||||
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, BackgroundTasks
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel
|
||||
import uvicorn
|
||||
|
||||
# Configuration
|
||||
LOGS_DIR = Path(__file__).parent / "logs" / "realtime"
|
||||
LOGS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize FastAPI app
|
||||
app = FastAPI(
|
||||
title="Hermes Agent Logging Server",
|
||||
description="Real-time WebSocket server for agent execution logging",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# Add CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
class SessionLogger:
|
||||
"""
|
||||
Manages logging for a single agent session.
|
||||
|
||||
Each agent execution gets its own SessionLogger instance.
|
||||
Responsible for:
|
||||
- Collecting all events for the session
|
||||
- Saving events to JSON file in real-time
|
||||
- Managing session lifecycle (start -> events -> finalize)
|
||||
"""
|
||||
|
||||
def __init__(self, session_id: str):
|
||||
self.session_id = session_id
|
||||
self.start_time = datetime.now()
|
||||
self.events: List[Dict[str, Any]] = [] # In-memory list of all events
|
||||
self.log_file = LOGS_DIR / f"session_{session_id}.json" # Where to save on disk
|
||||
|
||||
# Initialize session data structure
|
||||
# This is what gets saved to the JSON file
|
||||
self.session_data = {
|
||||
"session_id": session_id,
|
||||
"start_time": self.start_time.isoformat(),
|
||||
"end_time": None, # Set when session completes
|
||||
"events": [], # Will be populated as events come in
|
||||
"metadata": {} # Model, toolsets, etc. (set via session_start event)
|
||||
}
|
||||
|
||||
def add_event(self, event: Dict[str, Any]):
|
||||
"""
|
||||
Add an event to the session log.
|
||||
|
||||
Called every time a new event arrives (query, api_call, tool_call, etc).
|
||||
IMMEDIATELY saves to file for real-time persistence.
|
||||
"""
|
||||
# Add timestamp if not present (should always be added, but safety check)
|
||||
if "timestamp" not in event:
|
||||
event["timestamp"] = datetime.now().isoformat()
|
||||
|
||||
# Add to in-memory event list
|
||||
self.events.append(event)
|
||||
self.session_data["events"] = self.events
|
||||
|
||||
# CRITICAL: Save to file immediately (real-time logging)
|
||||
# This ensures events are persisted even if agent crashes
|
||||
self._save()
|
||||
|
||||
def set_metadata(self, metadata: Dict[str, Any]):
|
||||
"""Set session metadata (model, toolsets, etc.)."""
|
||||
self.session_data["metadata"].update(metadata)
|
||||
self._save()
|
||||
|
||||
def finalize(self):
|
||||
"""Finalize the session and save."""
|
||||
self.session_data["end_time"] = datetime.now().isoformat()
|
||||
self._save()
|
||||
|
||||
def _save(self):
|
||||
"""
|
||||
Save current session data to JSON file.
|
||||
|
||||
Called after EVERY event is added - provides real-time persistence.
|
||||
If file writing fails, logs error but continues (doesn't crash server).
|
||||
"""
|
||||
try:
|
||||
# Write complete session data to JSON file
|
||||
# indent=2 makes it human-readable
|
||||
# ensure_ascii=False preserves Unicode characters
|
||||
with open(self.log_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(self.session_data, f, indent=2, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
print(f"❌ Error saving session log: {e}")
|
||||
|
||||
|
||||
class ConnectionManager:
|
||||
"""
|
||||
Manages WebSocket connections and active sessions.
|
||||
|
||||
Global singleton that:
|
||||
- Tracks all active WebSocket connections (for broadcasting)
|
||||
- Manages all SessionLogger instances (one per agent session)
|
||||
- Coordinates between WebSocket events and file logging
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.active_connections: List[WebSocket] = [] # All connected WebSocket clients
|
||||
self.sessions: Dict[str, SessionLogger] = {} # session_id -> SessionLogger mapping
|
||||
|
||||
async def connect(self, websocket: WebSocket):
|
||||
"""Accept a new WebSocket connection."""
|
||||
await websocket.accept()
|
||||
self.active_connections.append(websocket)
|
||||
print(f"✅ WebSocket connected. Active connections: {len(self.active_connections)}")
|
||||
|
||||
def disconnect(self, websocket: WebSocket):
|
||||
"""Remove a WebSocket connection."""
|
||||
if websocket in self.active_connections:
|
||||
self.active_connections.remove(websocket)
|
||||
print(f"❌ WebSocket disconnected. Active connections: {len(self.active_connections)}")
|
||||
|
||||
def get_or_create_session(self, session_id: str) -> SessionLogger:
|
||||
"""
|
||||
Get existing session logger or create a new one.
|
||||
|
||||
Called when an event arrives for a session. Creates SessionLogger
|
||||
on first event, reuses it for subsequent events from same session.
|
||||
"""
|
||||
if session_id not in self.sessions:
|
||||
# First time seeing this session - create new logger
|
||||
self.sessions[session_id] = SessionLogger(session_id)
|
||||
print(f"📝 Created new session: {session_id}")
|
||||
return self.sessions[session_id]
|
||||
|
||||
def finalize_session(self, session_id: str):
|
||||
"""Finalize and clean up a session."""
|
||||
if session_id in self.sessions:
|
||||
self.sessions[session_id].finalize()
|
||||
print(f"✅ Session finalized: {session_id}")
|
||||
|
||||
async def broadcast(self, message: Dict[str, Any]):
|
||||
"""
|
||||
Broadcast a message to all connected WebSocket clients.
|
||||
|
||||
Allows multiple clients (e.g., multiple browser tabs) to watch
|
||||
the same agent session in real-time. Future UI feature.
|
||||
"""
|
||||
disconnected = []
|
||||
for connection in self.active_connections:
|
||||
try:
|
||||
await connection.send_json(message)
|
||||
except Exception:
|
||||
# Connection closed - mark for removal
|
||||
disconnected.append(connection)
|
||||
|
||||
# Clean up disconnected clients silently
|
||||
for conn in disconnected:
|
||||
if conn in self.active_connections:
|
||||
self.active_connections.remove(conn)
|
||||
|
||||
|
||||
# Global connection manager
|
||||
manager = ConnectionManager()
|
||||
|
||||
|
||||
# Request/Response models for API endpoints
|
||||
class AgentRequest(BaseModel):
|
||||
"""Request model for starting an agent run."""
|
||||
query: str
|
||||
model: str = "claude-sonnet-4-5-20250929"
|
||||
base_url: str = "https://api.anthropic.com/v1/"
|
||||
enabled_toolsets: Optional[List[str]] = None
|
||||
disabled_toolsets: Optional[List[str]] = None
|
||||
max_turns: int = 10
|
||||
mock_web_tools: bool = False
|
||||
mock_delay: int = 60
|
||||
verbose: bool = False
|
||||
|
||||
|
||||
class AgentResponse(BaseModel):
|
||||
"""Response model for agent run request."""
|
||||
status: str
|
||||
session_id: str
|
||||
message: str
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Root endpoint - server status."""
|
||||
return {
|
||||
"status": "running",
|
||||
"service": "Hermes Agent Logging Server",
|
||||
"websocket_url": "ws://localhost:8000/ws",
|
||||
"active_connections": len(manager.active_connections),
|
||||
"active_sessions": len(manager.sessions),
|
||||
"logs_directory": str(LOGS_DIR)
|
||||
}
|
||||
|
||||
|
||||
@app.get("/sessions")
|
||||
async def list_sessions():
|
||||
"""List all active and recent sessions."""
|
||||
# Get all session log files
|
||||
session_files = list(LOGS_DIR.glob("session_*.json"))
|
||||
|
||||
sessions = []
|
||||
for session_file in sorted(session_files, key=lambda x: x.stat().st_mtime, reverse=True):
|
||||
try:
|
||||
with open(session_file, 'r', encoding='utf-8') as f:
|
||||
session_data = json.load(f)
|
||||
sessions.append({
|
||||
"session_id": session_data.get("session_id"),
|
||||
"start_time": session_data.get("start_time"),
|
||||
"end_time": session_data.get("end_time"),
|
||||
"event_count": len(session_data.get("events", [])),
|
||||
"file": str(session_file)
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"⚠️ Error reading session file {session_file}: {e}")
|
||||
|
||||
return {
|
||||
"total_sessions": len(sessions),
|
||||
"sessions": sessions
|
||||
}
|
||||
|
||||
|
||||
@app.get("/sessions/{session_id}")
|
||||
async def get_session(session_id: str):
|
||||
"""Get detailed data for a specific session."""
|
||||
session_file = LOGS_DIR / f"session_{session_id}.json"
|
||||
|
||||
if not session_file.exists():
|
||||
return {"error": "Session not found"}, 404
|
||||
|
||||
try:
|
||||
with open(session_file, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
return {"error": f"Failed to load session: {str(e)}"}, 500
|
||||
|
||||
|
||||
@app.post("/agent/run", response_model=AgentResponse)
|
||||
async def run_agent(request: AgentRequest, background_tasks: BackgroundTasks):
|
||||
"""
|
||||
Start an agent run with specified parameters.
|
||||
|
||||
This endpoint triggers an agent execution in the background and returns immediately.
|
||||
The agent will connect to the WebSocket endpoint to send real-time events.
|
||||
|
||||
Args:
|
||||
request: AgentRequest with query and configuration
|
||||
background_tasks: FastAPI background tasks for async execution
|
||||
|
||||
Returns:
|
||||
AgentResponse with session_id for tracking
|
||||
"""
|
||||
import uuid
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Generate session ID for this run - we'll pass it to the agent
|
||||
session_id = str(uuid.uuid4())
|
||||
|
||||
# Add parent directory to path to import run_agent
|
||||
parent_dir = str(Path(__file__).parent.parent)
|
||||
if parent_dir not in sys.path:
|
||||
sys.path.insert(0, parent_dir)
|
||||
|
||||
from run_agent import AIAgent
|
||||
|
||||
# Run agent in background thread (not blocking the API)
|
||||
def run_agent_background():
|
||||
"""Run agent in a separate thread."""
|
||||
try:
|
||||
# Initialize agent with WebSocket logging enabled
|
||||
agent = AIAgent(
|
||||
base_url=request.base_url,
|
||||
model=request.model,
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
max_iterations=request.max_turns,
|
||||
enabled_toolsets=request.enabled_toolsets,
|
||||
disabled_toolsets=request.disabled_toolsets,
|
||||
save_trajectories=False,
|
||||
verbose_logging=request.verbose,
|
||||
enable_websocket_logging=True, # Always enable for UI
|
||||
websocket_server="ws://localhost:8000/ws",
|
||||
mock_web_tools=request.mock_web_tools,
|
||||
mock_delay=request.mock_delay
|
||||
)
|
||||
|
||||
# Run conversation with our session_id
|
||||
result = agent.run_conversation(
|
||||
request.query,
|
||||
session_id=session_id # Pass session_id so it matches
|
||||
)
|
||||
|
||||
print(f"✅ Agent run completed: {session_id[:8]}...")
|
||||
print(f" Final response: {result['final_response'][:100] if result.get('final_response') else 'No response'}...")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error running agent {session_id[:8]}...: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
# Start agent in background thread
|
||||
thread = threading.Thread(target=run_agent_background, daemon=True)
|
||||
thread.start()
|
||||
|
||||
return AgentResponse(
|
||||
status="started",
|
||||
session_id=session_id,
|
||||
message=f"Agent started with session ID: {session_id}"
|
||||
)
|
||||
|
||||
|
||||
@app.get("/tools")
|
||||
async def get_available_tools():
|
||||
"""Get list of available toolsets and tools."""
|
||||
try:
|
||||
import sys
|
||||
parent_dir = str(Path(__file__).parent.parent)
|
||||
if parent_dir not in sys.path:
|
||||
sys.path.insert(0, parent_dir)
|
||||
|
||||
from toolsets import get_all_toolsets, get_toolset_info
|
||||
|
||||
all_toolsets = get_all_toolsets()
|
||||
toolsets_info = []
|
||||
|
||||
for name in all_toolsets.keys():
|
||||
info = get_toolset_info(name)
|
||||
if info:
|
||||
toolsets_info.append({
|
||||
"name": name,
|
||||
"description": info['description'],
|
||||
"tool_count": info['tool_count'],
|
||||
"resolved_tools": info['resolved_tools']
|
||||
})
|
||||
|
||||
return {
|
||||
"toolsets": toolsets_info
|
||||
}
|
||||
except Exception as e:
|
||||
return {"error": f"Failed to load tools: {str(e)}"}
|
||||
|
||||
|
||||
@app.websocket("/ws")
|
||||
async def websocket_endpoint(websocket: WebSocket):
|
||||
"""
|
||||
WebSocket endpoint for receiving real-time agent events.
|
||||
|
||||
This is the main entry point for all logging. Agents connect here and send events.
|
||||
|
||||
Message Flow:
|
||||
1. Agent connects to ws://localhost:8000/ws
|
||||
2. Agent sends events as JSON messages
|
||||
3. Server parses event_type and routes to appropriate handler
|
||||
4. Event is added to SessionLogger (saved to file)
|
||||
5. Event is broadcast to all connected clients
|
||||
6. Acknowledgment sent back to agent
|
||||
|
||||
Expected message format:
|
||||
{
|
||||
"session_id": "unique-session-id", // Links event to specific session
|
||||
"event_type": "query" | "api_call" | ..., // What kind of event
|
||||
"data": { ... event-specific data ... } // Event payload
|
||||
}
|
||||
"""
|
||||
# Accept the WebSocket connection
|
||||
await manager.connect(websocket)
|
||||
|
||||
try:
|
||||
# Main event loop - runs until client disconnects
|
||||
while True:
|
||||
# Receive message from client (agent)
|
||||
# This is a blocking call - waits for next message
|
||||
message = await websocket.receive_json()
|
||||
|
||||
# Parse the standard message structure
|
||||
session_id = message.get("session_id") # Which agent session
|
||||
event_type = message.get("event_type") # What kind of event
|
||||
data = message.get("data", {}) # Event payload
|
||||
|
||||
# Validate: session_id is required
|
||||
if not session_id:
|
||||
await websocket.send_json({
|
||||
"error": "session_id is required"
|
||||
})
|
||||
continue
|
||||
|
||||
# Get or create SessionLogger for this session
|
||||
# First event creates it, subsequent events reuse it
|
||||
session = manager.get_or_create_session(session_id)
|
||||
|
||||
# Route event to appropriate handler based on event_type
|
||||
# Each handler extracts relevant data and adds to session log
|
||||
|
||||
if event_type == "session_start":
|
||||
# Initial event - sent when agent first connects
|
||||
# Contains metadata about the session (model, toolsets, etc.)
|
||||
session.set_metadata(data)
|
||||
print(f"🚀 Session started: {session_id}")
|
||||
|
||||
elif event_type == "query":
|
||||
# User query
|
||||
session.add_event({
|
||||
"type": "query",
|
||||
"query": data.get("query"),
|
||||
"toolsets": data.get("toolsets"),
|
||||
"model": data.get("model")
|
||||
})
|
||||
print(f"📝 Query logged: {data.get('query', '')[:60]}...")
|
||||
|
||||
elif event_type == "api_call":
|
||||
# API call to model
|
||||
session.add_event({
|
||||
"type": "api_call",
|
||||
"call_number": data.get("call_number"),
|
||||
"message_count": data.get("message_count"),
|
||||
"has_tools": data.get("has_tools")
|
||||
})
|
||||
print(f"🔄 API call #{data.get('call_number')} logged")
|
||||
|
||||
elif event_type == "response":
|
||||
# Assistant response
|
||||
session.add_event({
|
||||
"type": "response",
|
||||
"call_number": data.get("call_number"),
|
||||
"content": data.get("content"),
|
||||
"has_tool_calls": data.get("has_tool_calls"),
|
||||
"tool_call_count": data.get("tool_call_count"),
|
||||
"duration": data.get("duration")
|
||||
})
|
||||
print(f"🤖 Response logged: {data.get('content', '')[:60]}...")
|
||||
|
||||
elif event_type == "tool_call":
|
||||
# Tool execution
|
||||
session.add_event({
|
||||
"type": "tool_call",
|
||||
"call_number": data.get("call_number"),
|
||||
"tool_index": data.get("tool_index"),
|
||||
"tool_name": data.get("tool_name"),
|
||||
"parameters": data.get("parameters"),
|
||||
"tool_call_id": data.get("tool_call_id")
|
||||
})
|
||||
print(f"🔧 Tool call logged: {data.get('tool_name')}")
|
||||
|
||||
elif event_type == "tool_result":
|
||||
# Tool result - captures output from tool execution
|
||||
# Now includes BOTH truncated preview AND full raw result
|
||||
session.add_event({
|
||||
"type": "tool_result",
|
||||
"call_number": data.get("call_number"),
|
||||
"tool_index": data.get("tool_index"),
|
||||
"tool_name": data.get("tool_name"),
|
||||
"result": data.get("result"), # Truncated preview (1000 chars)
|
||||
"raw_result": data.get("raw_result"), # NEW: Full untruncated result
|
||||
"error": data.get("error"),
|
||||
"duration": data.get("duration"),
|
||||
"tool_call_id": data.get("tool_call_id")
|
||||
})
|
||||
|
||||
# Enhanced logging with size information
|
||||
if data.get("error"):
|
||||
print(f"❌ Tool error logged: {data.get('tool_name')}")
|
||||
else:
|
||||
# Show size of raw result to indicate data volume
|
||||
raw_size = len(data.get("raw_result", "")) if data.get("raw_result") else len(data.get("result", ""))
|
||||
size_kb = raw_size / 1024
|
||||
print(f"✅ Tool result logged: {data.get('tool_name')} ({size_kb:.1f} KB)")
|
||||
|
||||
elif event_type == "error":
|
||||
# Error event
|
||||
session.add_event({
|
||||
"type": "error",
|
||||
"error_message": data.get("error_message"),
|
||||
"call_number": data.get("call_number")
|
||||
})
|
||||
print(f"❌ Error logged: {data.get('error_message', '')[:60]}...")
|
||||
|
||||
elif event_type == "complete":
|
||||
# Session complete
|
||||
session.add_event({
|
||||
"type": "complete",
|
||||
"final_response": data.get("final_response"),
|
||||
"total_calls": data.get("total_calls"),
|
||||
"completed": data.get("completed")
|
||||
})
|
||||
manager.finalize_session(session_id)
|
||||
print(f"🎉 Session complete: {session_id}")
|
||||
|
||||
else:
|
||||
# Unknown event type - log it anyway
|
||||
session.add_event({
|
||||
"type": event_type or "unknown",
|
||||
**data
|
||||
})
|
||||
print(f"⚠️ Unknown event type: {event_type}")
|
||||
|
||||
# Broadcast event to all connected clients (for future real-time UI)
|
||||
# Allows multiple browsers/dashboards to watch same session live
|
||||
await manager.broadcast({
|
||||
"session_id": session_id,
|
||||
"event_type": event_type,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"data": data
|
||||
})
|
||||
|
||||
# Send acknowledgment back to sender
|
||||
# Confirms event was received and logged
|
||||
# Handle case where client disconnects before we can ack
|
||||
try:
|
||||
await websocket.send_json({
|
||||
"status": "logged",
|
||||
"session_id": session_id,
|
||||
"event_type": event_type
|
||||
})
|
||||
except Exception:
|
||||
# Connection closed before ack - this is normal for "complete" event
|
||||
# Client disconnects after sending, so we can't ack
|
||||
pass
|
||||
|
||||
except WebSocketDisconnect:
|
||||
manager.disconnect(websocket)
|
||||
except Exception as e:
|
||||
print(f"❌ WebSocket error: {e}")
|
||||
manager.disconnect(websocket)
|
||||
|
||||
|
||||
def main(host: str = "0.0.0.0", port: int = 8000, reload: bool = False):
|
||||
"""
|
||||
Start the logging server.
|
||||
|
||||
Args:
|
||||
host: Host to bind to (default: 0.0.0.0)
|
||||
port: Port to run on (default: 8000)
|
||||
reload: Enable auto-reload on file changes (default: False)
|
||||
"""
|
||||
print("🚀 Hermes Agent Logging Server")
|
||||
print("=" * 50)
|
||||
print(f"📂 Logs directory: {LOGS_DIR}")
|
||||
print(f"🌐 Server starting at http://{host}:{port}")
|
||||
print(f"🔌 WebSocket endpoint: ws://{host}:{port}/ws")
|
||||
print(f"🔄 Auto-reload: {'enabled' if reload else 'disabled'}")
|
||||
print("\n📡 Ready to receive agent events...")
|
||||
print("=" * 50)
|
||||
|
||||
uvicorn.run(
|
||||
"logging_server:app",
|
||||
host=host,
|
||||
port=port,
|
||||
reload=reload,
|
||||
log_level="info",
|
||||
timeout_keep_alive=600 # Keep HTTP/WS connections alive for 10 minutes of inactivity
|
||||
# Note: WebSocket ping/pong disabled in client to avoid timeout during blocked event loop
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import fire
|
||||
fire.Fire(main)
|
||||
|
||||
91
api_endpoint/test_websocket_logging.sh
Executable file
91
api_endpoint/test_websocket_logging.sh
Executable file
|
|
@ -0,0 +1,91 @@
|
|||
#!/bin/bash
|
||||
# Test script for WebSocket logging system
|
||||
#
|
||||
# This script demonstrates the complete WebSocket logging workflow:
|
||||
# 1. Starts the logging server
|
||||
# 2. Runs the agent with WebSocket logging enabled
|
||||
# 3. Shows the logged data
|
||||
#
|
||||
# Usage: ./test_websocket_logging.sh
|
||||
|
||||
set -e # Exit on error
|
||||
|
||||
echo "🧪 Testing WebSocket Logging System"
|
||||
echo "===================================="
|
||||
echo ""
|
||||
|
||||
# Check if required packages are installed
|
||||
echo "📦 Checking dependencies..."
|
||||
python -c "import fastapi; import uvicorn; import websockets" 2>/dev/null || {
|
||||
echo "❌ Missing dependencies. Installing..."
|
||||
pip install fastapi uvicorn websockets
|
||||
}
|
||||
echo "✅ Dependencies OK"
|
||||
echo ""
|
||||
|
||||
# Start the logging server in the background
|
||||
echo "🚀 Starting logging server..."
|
||||
python api_endpoint/logging_server.py --port 8000 &
|
||||
SERVER_PID=$!
|
||||
|
||||
# Give server time to start
|
||||
sleep 2
|
||||
|
||||
# Check if server is running
|
||||
if ps -p $SERVER_PID > /dev/null; then
|
||||
echo "✅ Logging server started (PID: $SERVER_PID)"
|
||||
else
|
||||
echo "❌ Failed to start logging server"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "🤖 Running agent with WebSocket logging..."
|
||||
echo ""
|
||||
|
||||
# Run the agent with WebSocket logging
|
||||
python run_agent.py \
|
||||
--enabled_toolsets web \
|
||||
--enable_websocket_logging \
|
||||
--query "What are the top 3 programming languages in 2025?" \
|
||||
--max_turns 5
|
||||
|
||||
echo ""
|
||||
echo "✅ Agent execution complete!"
|
||||
echo ""
|
||||
|
||||
# Show the most recent log file
|
||||
echo "📊 Viewing logged session data..."
|
||||
echo ""
|
||||
|
||||
LATEST_LOG=$(ls -t logs/realtime/session_*.json 2>/dev/null | head -1)
|
||||
|
||||
if [ -f "$LATEST_LOG" ]; then
|
||||
echo "📄 Log file: $LATEST_LOG"
|
||||
echo ""
|
||||
|
||||
# Pretty print the JSON if jq is available
|
||||
if command -v jq &> /dev/null; then
|
||||
echo "Event summary:"
|
||||
jq '.events[] | {type: .type, timestamp: .timestamp}' "$LATEST_LOG"
|
||||
echo ""
|
||||
echo "Total events: $(jq '.events | length' "$LATEST_LOG")"
|
||||
else
|
||||
echo "Content (install 'jq' for pretty printing):"
|
||||
cat "$LATEST_LOG"
|
||||
fi
|
||||
else
|
||||
echo "⚠️ No log files found in logs/realtime/"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "🛑 Stopping logging server..."
|
||||
kill $SERVER_PID 2>/dev/null || true
|
||||
|
||||
echo "✅ Test complete!"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo " 1. Start server: python api_endpoint/logging_server.py"
|
||||
echo " 2. Run agent: python run_agent.py --enable_websocket_logging --query \"...\""
|
||||
echo " 3. View logs: http://localhost:8000/sessions"
|
||||
|
||||
382
api_endpoint/websocket_connection_pool.py
Normal file
382
api_endpoint/websocket_connection_pool.py
Normal file
|
|
@ -0,0 +1,382 @@
|
|||
"""
|
||||
WebSocket Connection Pool - Persistent Connection Manager
|
||||
|
||||
This module provides a singleton WebSocket connection that persists across
|
||||
multiple agent runs. This is a more robust architecture than creating a new
|
||||
connection for each run.
|
||||
|
||||
Benefits:
|
||||
- No timeout issues (connection stays alive indefinitely)
|
||||
- No reconnection overhead (connect once)
|
||||
- Supports parallel agent runs (multiple sessions share one socket)
|
||||
- Proper shutdown handling (SIGTERM/SIGINT)
|
||||
- Thread-safe concurrent sends
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import signal
|
||||
import websockets
|
||||
from typing import Optional, Dict, Any
|
||||
import json
|
||||
import atexit
|
||||
import sys
|
||||
import threading
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class WebSocketConnectionPool:
|
||||
"""
|
||||
Singleton WebSocket connection manager.
|
||||
|
||||
Maintains a single persistent connection to the logging server
|
||||
that all agent sessions can use. Handles graceful shutdown.
|
||||
|
||||
Usage:
|
||||
# Get singleton instance
|
||||
pool = WebSocketConnectionPool()
|
||||
|
||||
# Connect (idempotent - safe to call multiple times)
|
||||
await pool.connect()
|
||||
|
||||
# Send events (thread-safe, multiple sessions can call concurrently)
|
||||
await pool.send_event("query", session_id, {...})
|
||||
|
||||
# Shutdown handled automatically on SIGTERM/SIGINT
|
||||
"""
|
||||
|
||||
_instance: Optional['WebSocketConnectionPool'] = None
|
||||
|
||||
def __new__(cls):
|
||||
"""Ensure only one instance exists (singleton pattern)."""
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
cls._instance._initialized = False
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the connection pool (only once)."""
|
||||
if getattr(self, '_initialized', False):
|
||||
return
|
||||
|
||||
self.websocket: Optional[websockets.WebSocketClientProtocol] = None
|
||||
self.server_url: str = "ws://localhost:8000/ws"
|
||||
self.connected: bool = False
|
||||
# Store reference to loop for signal handlers
|
||||
# Agent code should never close event loops when using persistent connections
|
||||
self.loop: Optional[asyncio.AbstractEventLoop] = None
|
||||
# Locks are created lazily when event loop exists
|
||||
self._send_lock: Optional[asyncio.Lock] = None
|
||||
self._connect_lock: Optional[asyncio.Lock] = None
|
||||
self._locks_loop: Optional[asyncio.AbstractEventLoop] = None # Track which loop created locks
|
||||
self._init_lock = threading.Lock() # Thread-safe lock initialization
|
||||
self._shutdown_in_progress = False
|
||||
self._initialized = True
|
||||
|
||||
# Register shutdown handlers for graceful cleanup
|
||||
# These ensure WebSocket is closed properly on exit
|
||||
signal.signal(signal.SIGTERM, self._signal_handler)
|
||||
signal.signal(signal.SIGINT, self._signal_handler)
|
||||
atexit.register(self._cleanup_sync)
|
||||
|
||||
print("🔌 WebSocket connection pool initialized")
|
||||
|
||||
def _ensure_locks(self):
|
||||
"""
|
||||
Lazy initialization of asyncio locks with thread safety and loop tracking.
|
||||
|
||||
Locks must be created when an event loop exists, not at import time.
|
||||
If the event loop changes between runs, locks must be recreated because
|
||||
asyncio.Lock objects are bound to the loop that created them.
|
||||
|
||||
This is called before any async operation that needs locks.
|
||||
Uses a threading.Lock to prevent race conditions during initialization.
|
||||
"""
|
||||
with self._init_lock: # Thread-safe initialization
|
||||
try:
|
||||
current_loop = asyncio.get_event_loop()
|
||||
except RuntimeError:
|
||||
# No event loop in current thread
|
||||
return
|
||||
|
||||
# Recreate locks if:
|
||||
# 1. Locks don't exist yet, OR
|
||||
# 2. Event loop has changed (locks are bound to the loop that created them)
|
||||
if self._locks_loop is not current_loop or self._send_lock is None:
|
||||
self._send_lock = asyncio.Lock()
|
||||
self._connect_lock = asyncio.Lock()
|
||||
self._locks_loop = current_loop
|
||||
|
||||
async def connect(self, server_url: str = "ws://localhost:8000/ws") -> bool:
|
||||
"""
|
||||
Connect to WebSocket server.
|
||||
|
||||
This is idempotent - safe to call multiple times. If already connected,
|
||||
does nothing. If connection failed previously, will retry.
|
||||
|
||||
Args:
|
||||
server_url: WebSocket server URL (default: ws://localhost:8000/ws)
|
||||
|
||||
Returns:
|
||||
bool: True if connected successfully, False otherwise
|
||||
"""
|
||||
# Ensure locks exist (lazy initialization)
|
||||
self._ensure_locks()
|
||||
|
||||
async with self._connect_lock:
|
||||
# Always update loop reference to current loop (even if already connected)
|
||||
# This ensures signal handlers and cleanup use the correct loop
|
||||
self.loop = asyncio.get_event_loop()
|
||||
|
||||
# Already connected - nothing to do
|
||||
if self.connected and self.websocket:
|
||||
return True
|
||||
|
||||
try:
|
||||
self.server_url = server_url
|
||||
|
||||
# Establish persistent WebSocket connection
|
||||
# No ping/pong needed since connection stays open indefinitely
|
||||
self.websocket = await websockets.connect(
|
||||
server_url,
|
||||
ping_interval=None, # Disable ping/pong (not needed for persistent connection)
|
||||
max_size=10 * 1024 * 1024, # 10MB max message size for large tool results
|
||||
open_timeout=10, # 10s timeout for initial connection
|
||||
close_timeout=5 # 5s timeout for close handshake
|
||||
)
|
||||
|
||||
self.connected = True
|
||||
|
||||
print(f"✅ Connected to logging server (persistent): {server_url}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ Failed to connect to logging server: {e}")
|
||||
self.connected = False
|
||||
self.websocket = None
|
||||
return False
|
||||
|
||||
async def send_event(
|
||||
self,
|
||||
event_type: str,
|
||||
session_id: str,
|
||||
data: Dict[str, Any],
|
||||
retry: bool = True
|
||||
) -> bool:
|
||||
"""
|
||||
Send event to logging server (thread-safe).
|
||||
|
||||
Multiple agent runs can call this concurrently. The send lock ensures
|
||||
only one message is sent at a time (WebSocket protocol requirement).
|
||||
|
||||
Args:
|
||||
event_type: Type of event (query, api_call, response, tool_call, tool_result, error, complete)
|
||||
session_id: Unique session identifier
|
||||
data: Event-specific data dictionary
|
||||
retry: Whether to retry connection if disconnected (default: True)
|
||||
|
||||
Returns:
|
||||
bool: True if sent successfully, False otherwise
|
||||
"""
|
||||
# Try to connect if not connected (or reconnect if disconnected)
|
||||
if not self.connected or not self.websocket:
|
||||
if retry:
|
||||
await self.connect()
|
||||
if not self.connected:
|
||||
return False # Give up if connection fails
|
||||
|
||||
# Ensure locks exist (lazy initialization)
|
||||
self._ensure_locks()
|
||||
|
||||
# Lock to prevent concurrent sends (WebSocket requires sequential sends)
|
||||
async with self._send_lock:
|
||||
try:
|
||||
# Create standardized message format
|
||||
message = {
|
||||
"session_id": session_id,
|
||||
"event_type": event_type,
|
||||
"data": data,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
# Send message as JSON
|
||||
await self.websocket.send(json.dumps(message))
|
||||
|
||||
# Wait for server acknowledgment (with timeout)
|
||||
# This confirms the server received and processed the event
|
||||
try:
|
||||
response = await asyncio.wait_for(
|
||||
self.websocket.recv(),
|
||||
timeout=2.0 # Increased to 2s for busy servers
|
||||
)
|
||||
# Successfully received acknowledgment
|
||||
return True
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# No response within timeout - that's OK, message likely sent
|
||||
# Server might be busy processing
|
||||
return True
|
||||
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
print(f"⚠️ WebSocket connection closed unexpectedly")
|
||||
self.connected = False
|
||||
|
||||
# Try to reconnect and resend (one retry)
|
||||
if retry:
|
||||
print("🔄 Attempting to reconnect...")
|
||||
if await self.connect():
|
||||
# Recursively call with retry=False to avoid infinite loop
|
||||
return await self.send_event(event_type, session_id, data, retry=False)
|
||||
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ Error sending event: {e}")
|
||||
self.connected = False
|
||||
return False
|
||||
|
||||
async def disconnect(self):
|
||||
"""
|
||||
Gracefully close the WebSocket connection.
|
||||
|
||||
Called on shutdown (SIGTERM/SIGINT/exit). Ensures proper cleanup.
|
||||
"""
|
||||
if self._shutdown_in_progress:
|
||||
return # Already shutting down
|
||||
|
||||
self._shutdown_in_progress = True
|
||||
|
||||
if self.websocket and self.connected:
|
||||
try:
|
||||
await self.websocket.close()
|
||||
self.connected = False
|
||||
print("✅ WebSocket connection pool closed gracefully")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Error closing WebSocket: {e}")
|
||||
|
||||
self._shutdown_in_progress = False
|
||||
|
||||
def _signal_handler(self, signum, frame):
|
||||
"""
|
||||
Handle SIGTERM/SIGINT signals for graceful shutdown.
|
||||
|
||||
When user presses Ctrl+C or system sends SIGTERM, this ensures
|
||||
the WebSocket is closed properly before exit.
|
||||
"""
|
||||
print(f"\n🛑 Received signal {signum}, closing WebSocket connection pool...")
|
||||
|
||||
# Check if we have a valid loop and are connected
|
||||
if self.loop and not self.loop.is_closed() and self.connected and not self._shutdown_in_progress:
|
||||
try:
|
||||
# If loop is not running, we can wait for disconnect
|
||||
if not self.loop.is_running():
|
||||
self.loop.run_until_complete(self.disconnect())
|
||||
else:
|
||||
# Loop is running, can't wait for task - just mark disconnected
|
||||
# The disconnect task would be cancelled when we exit anyway
|
||||
self.connected = False
|
||||
print("⚠️ Loop is running, marking disconnected without waiting")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Error during signal handler cleanup: {e}")
|
||||
|
||||
# Exit gracefully
|
||||
sys.exit(0)
|
||||
|
||||
def _cleanup_sync(self):
|
||||
"""
|
||||
Cleanup at exit (atexit handler).
|
||||
|
||||
This is a fallback in case signal handlers don't fire.
|
||||
Called when Python interpreter shuts down normally.
|
||||
"""
|
||||
if self.loop and not self.loop.is_closed() and self.connected and not self._shutdown_in_progress:
|
||||
try:
|
||||
# Try to run disconnect synchronously
|
||||
self.loop.run_until_complete(self.disconnect())
|
||||
except Exception:
|
||||
# Ignore errors during exit cleanup
|
||||
pass
|
||||
|
||||
def is_connected(self) -> bool:
|
||||
"""Check if currently connected to server."""
|
||||
return self.connected and self.websocket is not None
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get connection statistics for debugging."""
|
||||
return {
|
||||
"connected": self.connected,
|
||||
"server_url": self.server_url,
|
||||
"shutdown_in_progress": self._shutdown_in_progress,
|
||||
"has_websocket": self.websocket is not None,
|
||||
"has_loop": self.loop is not None
|
||||
}
|
||||
|
||||
|
||||
# Global singleton instance
|
||||
# Import this in other modules: from websocket_connection_pool import ws_pool
|
||||
ws_pool = WebSocketConnectionPool()
|
||||
|
||||
|
||||
# Convenience functions for direct usage
|
||||
async def connect(server_url: str = "ws://localhost:8000/ws") -> bool:
|
||||
"""Connect to logging server (convenience function)."""
|
||||
return await ws_pool.connect(server_url)
|
||||
|
||||
|
||||
async def send_event(event_type: str, session_id: str, data: Dict[str, Any]) -> bool:
|
||||
"""Send event to logging server (convenience function)."""
|
||||
return await ws_pool.send_event(event_type, session_id, data)
|
||||
|
||||
|
||||
async def disconnect():
|
||||
"""Disconnect from logging server (convenience function)."""
|
||||
await ws_pool.disconnect()
|
||||
|
||||
|
||||
def is_connected() -> bool:
|
||||
"""Check if connected to logging server (convenience function)."""
|
||||
return ws_pool.is_connected()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# SYNCHRONOUS API FOR AGENT LAYER
|
||||
# ============================================================================
|
||||
# These functions provide a clean abstraction that hides event loop management
|
||||
# from the agent layer. Agent code should ONLY use these functions.
|
||||
|
||||
def connect_sync(server_url: str = "ws://localhost:8000/ws") -> bool:
|
||||
"""
|
||||
Synchronous connect - handles event loop internally.
|
||||
|
||||
Agent code should use this instead of directly managing event loops.
|
||||
This ensures the connection pool maintains full control over its lifecycle.
|
||||
"""
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
if loop.is_closed():
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
except RuntimeError:
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
return loop.run_until_complete(ws_pool.connect(server_url))
|
||||
|
||||
|
||||
def send_event_sync(event_type: str, session_id: str, data: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Synchronous send event - handles event loop internally.
|
||||
|
||||
Agent code should use this instead of managing event loops.
|
||||
This ensures the connection pool maintains full control over its lifecycle.
|
||||
"""
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
if loop.is_closed():
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
except RuntimeError:
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
return loop.run_until_complete(ws_pool.send_event(event_type, session_id, data))
|
||||
|
||||
387
api_endpoint/websocket_logger.py
Normal file
387
api_endpoint/websocket_logger.py
Normal file
|
|
@ -0,0 +1,387 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
WebSocket Logger Client
|
||||
|
||||
Simple client for sending agent events to the logging server via WebSocket.
|
||||
Used by the agent to log events in real-time during execution.
|
||||
"""
|
||||
|
||||
import json
|
||||
import asyncio
|
||||
from typing import Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
import websockets
|
||||
|
||||
|
||||
class WebSocketLogger:
|
||||
"""
|
||||
Client for logging agent events via WebSocket.
|
||||
|
||||
Usage:
|
||||
logger = WebSocketLogger("unique-session-id")
|
||||
await logger.connect()
|
||||
await logger.log_query("What is Python?", model="gpt-4")
|
||||
await logger.log_api_call(call_number=1)
|
||||
await logger.log_response(call_number=1, content="Python is...")
|
||||
await logger.disconnect()
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
session_id: str,
|
||||
server_url: str = "ws://localhost:8000/ws",
|
||||
enabled: bool = True
|
||||
):
|
||||
"""
|
||||
Initialize WebSocket logger.
|
||||
|
||||
Args:
|
||||
session_id: Unique identifier for this agent session
|
||||
server_url: WebSocket server URL (default: ws://localhost:8000/ws)
|
||||
enabled: Whether logging is enabled (default: True)
|
||||
"""
|
||||
self.session_id = session_id
|
||||
self.server_url = server_url
|
||||
self.enabled = enabled
|
||||
self.websocket: Optional[websockets.WebSocketClientProtocol] = None
|
||||
self.connected = False
|
||||
self.reconnect_count = 0 # Track reconnections for debugging
|
||||
|
||||
async def connect(self):
|
||||
"""
|
||||
Connect to the WebSocket logging server.
|
||||
|
||||
Establishes WebSocket connection and sends initial session_start event.
|
||||
If connection fails, gracefully disables logging (agent continues normally).
|
||||
"""
|
||||
if not self.enabled:
|
||||
return
|
||||
|
||||
try:
|
||||
# Establish WebSocket connection to the server
|
||||
# Use VERY LONG ping intervals to avoid timeout during long tool execution
|
||||
# The event loop is blocked during tool execution, so we can't process pings
|
||||
# Setting to very large values (1 hour) effectively disables it
|
||||
self.websocket = await websockets.connect(
|
||||
self.server_url,
|
||||
ping_interval=3600, # 1 hour - effectively disabled (event loop blocked anyway)
|
||||
ping_timeout=3600, # 1 hour timeout for pong response
|
||||
close_timeout=10, # Timeout for close handshake
|
||||
max_size=10 * 1024 * 1024, # 10MB max message size (for large raw_results)
|
||||
open_timeout=10 # Timeout for initial connection
|
||||
)
|
||||
self.connected = True
|
||||
print(f"✅ Connected to logging server (ping/pong: 3600s intervals): {self.server_url}")
|
||||
|
||||
# Send initial session_start event
|
||||
# This tells the server to create a new SessionLogger for this session
|
||||
await self._send_event("session_start", {
|
||||
"session_id": self.session_id,
|
||||
"start_time": datetime.now().isoformat()
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
# Connection failed - disable logging but don't crash the agent
|
||||
print(f"⚠️ Failed to connect to logging server: {e}")
|
||||
print(f" Logging will be disabled for this session.")
|
||||
self.enabled = False
|
||||
self.connected = False
|
||||
|
||||
async def disconnect(self):
|
||||
"""Disconnect from the WebSocket server."""
|
||||
if self.websocket and self.connected:
|
||||
try:
|
||||
await self.websocket.close()
|
||||
self.connected = False
|
||||
print(f"✅ Disconnected from logging server")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Error disconnecting: {e}")
|
||||
|
||||
async def _send_event(self, event_type: str, data: Dict[str, Any]):
|
||||
"""
|
||||
Send an event to the logging server.
|
||||
|
||||
This is the core method that sends all events via WebSocket.
|
||||
Creates a standardized message format and handles acknowledgments.
|
||||
|
||||
Args:
|
||||
event_type: Type of event (query, api_call, response, tool_call, tool_result, error, complete)
|
||||
data: Event data dictionary containing event-specific information
|
||||
"""
|
||||
# Safety check: Don't send if logging is disabled
|
||||
if not self.enabled:
|
||||
return
|
||||
|
||||
# Auto-reconnect if connection was lost
|
||||
if not self.connected or not self.websocket:
|
||||
try:
|
||||
self.reconnect_count += 1
|
||||
print(f"🔄 Reconnecting to logging server (attempt #{self.reconnect_count})...")
|
||||
await self.connect()
|
||||
print(f"✅ Reconnected successfully!")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Failed to reconnect: {e}")
|
||||
self.enabled = False # Disable logging after failed reconnect
|
||||
return
|
||||
|
||||
try:
|
||||
# Create standardized message structure
|
||||
# All events follow this format for consistent server-side handling
|
||||
message = {
|
||||
"session_id": self.session_id, # Links event to specific agent session
|
||||
"event_type": event_type, # Identifies what kind of event this is
|
||||
"data": data # Event-specific payload
|
||||
}
|
||||
|
||||
# Send message as JSON string over WebSocket
|
||||
await self.websocket.send(json.dumps(message))
|
||||
|
||||
# Wait for server acknowledgment (with 1 second timeout)
|
||||
# This ensures the server received and processed the event
|
||||
try:
|
||||
response = await asyncio.wait_for(
|
||||
self.websocket.recv(),
|
||||
timeout=1.0
|
||||
)
|
||||
# Server sends back: {"status": "logged", "session_id": "...", "event_type": "..."}
|
||||
# We don't need to process it, just confirms receipt
|
||||
except asyncio.TimeoutError:
|
||||
# No response within 1 second - that's okay, continue anyway
|
||||
# Server might be busy or network slow, but event was likely sent
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
# Log error but don't crash - graceful degradation
|
||||
# Agent should continue working even if logging fails
|
||||
error_str = str(e)
|
||||
|
||||
# Check if connection was closed (error 1011 = keepalive ping timeout)
|
||||
if "1011" in error_str or "closed" in error_str.lower():
|
||||
print(f"⚠️ WebSocket connection closed: {error_str}")
|
||||
self.connected = False # Mark as disconnected
|
||||
# Don't try to send more events - connection is dead
|
||||
else:
|
||||
print(f"⚠️ Error sending event to logging server: {e}")
|
||||
# Don't disable entirely or try to reconnect - just continue with logging disabled
|
||||
|
||||
# Convenience methods for specific event types
|
||||
|
||||
async def log_query(
|
||||
self,
|
||||
query: str,
|
||||
model: str = None,
|
||||
toolsets: list = None
|
||||
):
|
||||
"""
|
||||
Log a user query (the question/task given to the agent).
|
||||
|
||||
This is typically the first event in a session after connection.
|
||||
Captures what the user asked and which model/tools will be used.
|
||||
"""
|
||||
await self._send_event("query", {
|
||||
"query": query, # The user's question/instruction
|
||||
"model": model, # Which AI model is being used
|
||||
"toolsets": toolsets # Which tool categories are enabled
|
||||
})
|
||||
|
||||
async def log_api_call(
|
||||
self,
|
||||
call_number: int,
|
||||
message_count: int = None,
|
||||
has_tools: bool = None
|
||||
):
|
||||
"""
|
||||
Log an API call to the AI model.
|
||||
|
||||
Called right before sending a request to the model (OpenAI/Anthropic/etc).
|
||||
Helps track how many API calls are being made and conversation length.
|
||||
"""
|
||||
await self._send_event("api_call", {
|
||||
"call_number": call_number, # Sequential number (1, 2, 3...)
|
||||
"message_count": message_count, # How many messages in conversation so far
|
||||
"has_tools": has_tools # Whether tools are available to the model
|
||||
})
|
||||
|
||||
async def log_response(
|
||||
self,
|
||||
call_number: int,
|
||||
content: str = None,
|
||||
has_tool_calls: bool = False,
|
||||
tool_call_count: int = 0,
|
||||
duration: float = None
|
||||
):
|
||||
"""
|
||||
Log an assistant response from the AI model.
|
||||
|
||||
Called after receiving a response from the API.
|
||||
Captures what the model said and whether it wants to use tools.
|
||||
"""
|
||||
await self._send_event("response", {
|
||||
"call_number": call_number, # Which API call this response is from
|
||||
"content": content, # What the model said (text response)
|
||||
"has_tool_calls": has_tool_calls, # Did model request tool execution?
|
||||
"tool_call_count": tool_call_count, # How many tools does it want to call?
|
||||
"duration": duration # How long the API call took (seconds)
|
||||
})
|
||||
|
||||
async def log_tool_call(
|
||||
self,
|
||||
call_number: int,
|
||||
tool_index: int,
|
||||
tool_name: str,
|
||||
parameters: Dict[str, Any],
|
||||
tool_call_id: str = None
|
||||
):
|
||||
"""
|
||||
Log a tool call (before executing the tool).
|
||||
|
||||
Captures which tool is being called and with what parameters.
|
||||
This happens BEFORE the tool runs, so no results yet.
|
||||
"""
|
||||
await self._send_event("tool_call", {
|
||||
"call_number": call_number, # Which API call requested this tool
|
||||
"tool_index": tool_index, # Which tool in the sequence (if multiple)
|
||||
"tool_name": tool_name, # Name of tool (e.g., "web_search", "web_extract")
|
||||
"parameters": parameters, # Arguments passed to the tool (e.g., {"query": "Python", "limit": 5})
|
||||
"tool_call_id": tool_call_id # Unique ID to link call with result
|
||||
})
|
||||
|
||||
async def log_tool_result(
|
||||
self,
|
||||
call_number: int,
|
||||
tool_index: int,
|
||||
tool_name: str,
|
||||
result: str = None,
|
||||
error: str = None,
|
||||
duration: float = None,
|
||||
tool_call_id: str = None,
|
||||
raw_result: str = None # NEW: Full untruncated result for verification
|
||||
):
|
||||
"""
|
||||
Log a tool result (output from tool execution).
|
||||
|
||||
Captures both a truncated preview (for UI display) and the full raw result
|
||||
(for verification and debugging). This is especially important for web tools
|
||||
where you want to see what was scraped vs what the LLM processed.
|
||||
|
||||
Args:
|
||||
call_number: Which API call this tool was part of
|
||||
tool_index: Which tool in the sequence (1st, 2nd, etc.)
|
||||
tool_name: Name of the tool that was executed
|
||||
result: Tool output (will be truncated to 1000 chars for preview)
|
||||
error: Error message if tool failed
|
||||
duration: How long the tool took to execute (seconds)
|
||||
tool_call_id: Unique ID linking this result to the tool call
|
||||
raw_result: NEW - Full untruncated result for verification/debugging
|
||||
"""
|
||||
await self._send_event("tool_result", {
|
||||
"call_number": call_number,
|
||||
"tool_index": tool_index,
|
||||
"tool_name": tool_name,
|
||||
"result": result[:1000] if result else None, # Truncated preview (1000 chars max)
|
||||
"raw_result": raw_result, # NEW: Full result - can be 100KB+ for web scraping
|
||||
"error": error,
|
||||
"duration": duration,
|
||||
"tool_call_id": tool_call_id
|
||||
})
|
||||
|
||||
async def log_error(
|
||||
self,
|
||||
error_message: str,
|
||||
call_number: int = None
|
||||
):
|
||||
"""
|
||||
Log an error that occurred during agent execution.
|
||||
|
||||
Captures exceptions, API failures, or other issues.
|
||||
"""
|
||||
await self._send_event("error", {
|
||||
"error_message": error_message, # Description of what went wrong
|
||||
"call_number": call_number # Which API call caused the error (if applicable)
|
||||
})
|
||||
|
||||
async def log_complete(
|
||||
self,
|
||||
final_response: str = None,
|
||||
total_calls: int = None,
|
||||
completed: bool = True
|
||||
):
|
||||
"""
|
||||
Log session completion (final event before disconnecting).
|
||||
|
||||
Marks the end of the agent's execution and provides summary info.
|
||||
"""
|
||||
await self._send_event("complete", {
|
||||
"final_response": final_response[:500] if final_response else None, # Truncated summary of final answer
|
||||
"total_calls": total_calls, # How many API calls were made total
|
||||
"completed": completed # Did it complete successfully? (true/false)
|
||||
})
|
||||
|
||||
|
||||
# Synchronous wrapper for convenience
|
||||
class SyncWebSocketLogger:
|
||||
"""
|
||||
Synchronous wrapper around WebSocketLogger.
|
||||
|
||||
For use in synchronous code - creates an event loop internally.
|
||||
"""
|
||||
|
||||
def __init__(self, session_id: str, server_url: str = "ws://localhost:8000/ws", enabled: bool = True):
|
||||
self.logger = WebSocketLogger(session_id, server_url, enabled)
|
||||
self.loop = None
|
||||
|
||||
def connect(self):
|
||||
"""Connect to server (synchronous)."""
|
||||
self.loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(self.loop)
|
||||
self.loop.run_until_complete(self.logger.connect())
|
||||
|
||||
def disconnect(self):
|
||||
"""Disconnect from server (synchronous)."""
|
||||
if self.loop:
|
||||
self.loop.run_until_complete(self.logger.disconnect())
|
||||
self.loop.close()
|
||||
|
||||
def _run_async(self, coro):
|
||||
"""
|
||||
Run an async coroutine synchronously.
|
||||
|
||||
Bridge between sync code (agent) and async code (WebSocket).
|
||||
Uses event loop to execute async operations in sync context.
|
||||
"""
|
||||
if self.loop and self.loop.is_running():
|
||||
# Already in event loop, just await
|
||||
asyncio.create_task(coro)
|
||||
else:
|
||||
# Run in current loop
|
||||
if self.loop:
|
||||
self.loop.run_until_complete(coro)
|
||||
|
||||
def log_query(self, query: str, model: str = None, toolsets: list = None):
|
||||
self._run_async(self.logger.log_query(query, model, toolsets))
|
||||
|
||||
def log_api_call(self, call_number: int, message_count: int = None, has_tools: bool = None):
|
||||
self._run_async(self.logger.log_api_call(call_number, message_count, has_tools))
|
||||
|
||||
def log_response(self, call_number: int, content: str = None, has_tool_calls: bool = False,
|
||||
tool_call_count: int = 0, duration: float = None):
|
||||
self._run_async(self.logger.log_response(call_number, content, has_tool_calls,
|
||||
tool_call_count, duration))
|
||||
|
||||
def log_tool_call(self, call_number: int, tool_index: int, tool_name: str,
|
||||
parameters: Dict[str, Any], tool_call_id: str = None):
|
||||
self._run_async(self.logger.log_tool_call(call_number, tool_index, tool_name,
|
||||
parameters, tool_call_id))
|
||||
|
||||
def log_tool_result(self, call_number: int, tool_index: int, tool_name: str,
|
||||
result: str = None, error: str = None, duration: float = None,
|
||||
tool_call_id: str = None, raw_result: str = None):
|
||||
self._run_async(self.logger.log_tool_result(call_number, tool_index, tool_name,
|
||||
result, error, duration, tool_call_id, raw_result))
|
||||
|
||||
def log_error(self, error_message: str, call_number: int = None):
|
||||
self._run_async(self.logger.log_error(error_message, call_number))
|
||||
|
||||
def log_complete(self, final_response: str = None, total_calls: int = None, completed: bool = True):
|
||||
self._run_async(self.logger.log_complete(final_response, total_calls, completed))
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue