Add mini-swe-agent runner and trajectory compressor

- Introduced mini_swe_runner.py for executing tasks using mini-swe-agent environments (local, Docker, Modal) and outputting trajectories in Hermes format. - Implemented trajectory_compressor.py to post-process agent trajectories, compressing them within a target token budget while preserving essential content. - Added trajectory_compression.yaml configuration file for customizable compression settings. - Created sample_and_compress.py script to download, sample, and compress trajectories from HuggingFace datasets. - Enhanced logging and error handling across new modules for improved usability and debugging.
2026-04-28 01:21:43 +00:00 · 2026-01-23 00:52:46 +00:00 · 2026-01-23 00:52:46 +00:00 · 47555602d7
commit 47555602d7
parent 6eb76c7c1a
4 changed files with 2455 additions and 0 deletions
--- a/configs/trajectory_compression.yaml
+++ b/configs/trajectory_compression.yaml
@ -0,0 +1,97 @@
+# Trajectory Compression Configuration
+# 
+# Post-processes completed agent trajectories to fit within a target token budget.
+# Compression preserves head/tail turns and summarizes middle content only as needed.
+
+# Tokenizer settings for accurate token counting
+tokenizer:
+  # HuggingFace tokenizer name
+  name: "moonshotai/Kimi-K2-Thinking"
+  
+  # Trust remote code (required for some tokenizers)
+  trust_remote_code: true
+
+# Compression targets and behavior
+compression:
+  # Target maximum tokens for compressed trajectory
+  target_max_tokens: 29000
+  
+  # Target size for summary (in tokens)
+  # This is factored into calculations when determining what to compress
+  summary_target_tokens: 750
+
+# Protected turns that should NEVER be compressed
+protected_turns:
+  # Always protect the first system message (tool definitions)
+  first_system: true
+  
+  # Always protect the first human message (original request)
+  first_human: true
+  
+  # Always protect the first gpt message (initial response/tool_call)
+  first_gpt: true
+  
+  # Always protect the first tool response (result of first action)
+  first_tool: true
+  
+  # Always protect the last 2 complete turn pairs (gpt+tool or gpt only)
+  # This ensures the model's final actions and conclusions are preserved
+  last_n_turns: 4
+
+# LLM settings for generating summaries (OpenRouter only)
+summarization:
+  # Model to use for summarization (should be fast and cheap)
+  # Using OpenRouter model path format
+  model: "google/gemini-3-flash-preview"
+  
+  # OpenRouter API settings
+  base_url: "https://openrouter.ai/api/v1"
+  
+  # Environment variable containing OpenRouter API key
+  api_key_env: "OPENROUTER_API_KEY"
+  
+  # Temperature for summarization (lower = more deterministic)
+  temperature: 0.3
+  
+  # Max retries for API failures
+  max_retries: 3
+  
+  # Delay between retries (seconds)
+  retry_delay: 2
+
+# Output settings
+output:
+  # Add notice to system message about potential summarization
+  add_summary_notice: true
+  
+  # Text to append to system message
+  summary_notice_text: "\n\nSome of the conversation may be summarized to preserve context."
+  
+  # Output directory suffix (appended to input directory name)
+  output_suffix: "_compressed"
+
+# Processing settings
+processing:
+  # Number of parallel workers for batch processing
+  num_workers: 4
+  
+  # Maximum concurrent API calls for summarization (async parallelism)
+  max_concurrent_requests: 50
+  
+  # Skip trajectories that are already under target length
+  skip_under_target: true
+  
+  # If true, save trajectories even if compression can't get under target
+  # (will compress as much as possible)
+  save_over_limit: true
+
+# Metrics to track
+metrics:
+  # Log detailed compression statistics
+  enabled: true
+  
+  # Save per-trajectory metrics in output
+  per_trajectory: false
+  
+  # Metrics file name (saved in output directory)
+  output_file: "compression_metrics.json"