mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-27 01:11:40 +00:00
feat: add self-evolution plugin — agent self-optimization system
Add a comprehensive self-evolution system that enables Hermes Agent to continuously improve through automated analysis and optimization: Core components: - reflection_engine: Nightly session analysis (1:00 AM) - evolution_proposer: Generate improvement proposals from insights - quality_scorer: Multi-dimensional session quality evaluation - strategy_injector: Inject learned strategies into new sessions - strategy_compressor: Strategy optimization and deduplication - git_analyzer: Code change pattern analysis - rule_engine: Pattern-based rule generation - feishu_notifier: Feishu card notifications for evolution events Storage: - db.py: SQLite telemetry storage - strategy_store: Persistent strategy storage - models.py: Data models Plugin integration: - plugin.yaml, hooks.py, __init__.py for plugin system - cron_jobs.py for scheduled tasks Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
e5d41f05d4
commit
3cd384dc43
23 changed files with 6173 additions and 0 deletions
751
self_evolution/reflection_engine.py
Normal file
751
self_evolution/reflection_engine.py
Normal file
|
|
@ -0,0 +1,751 @@
|
|||
"""
|
||||
Self Evolution Plugin — Dream Engine (Reflection Engine)
|
||||
=========================================================
|
||||
|
||||
Runs nightly at 1:00 to analyze the previous day's sessions.
|
||||
|
||||
Design reference: Claude Code plugins/hookify/agents/conversation-analyzer.md
|
||||
- Analyzes conversations in reverse chronological order
|
||||
- Detects: corrections, frustrations, repeated issues, reversions
|
||||
- Extracts tool usage patterns, converts to actionable rules
|
||||
- Categorizes by severity
|
||||
|
||||
We extend this pattern with:
|
||||
- Full automated analysis (not just on user request)
|
||||
- Error analysis (tool failures, retries, API errors)
|
||||
- Time waste analysis (slow tools, repeated ops, inefficient sessions)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from self_evolution import db
|
||||
from self_evolution.model_config import resolve_config, get_active_text_config, switch_to_fallback
|
||||
from self_evolution.git_analyzer import analyze_code_changes
|
||||
from self_evolution.models import (
|
||||
ErrorAnalysis, ToolFailure, RetryPattern,
|
||||
WasteAnalysis, ToolDuration, RepeatedOperation,
|
||||
CodeChangeAnalysis, CommitInfo,
|
||||
ReflectionReport,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ── Backward-compatible aliases ────────────────────────────────────────────
|
||||
# These are used by cron_jobs.py and other callers.
|
||||
_resolve_runtime_config = resolve_config
|
||||
_get_active_text_config = get_active_text_config
|
||||
_switch_to_fallback = switch_to_fallback
|
||||
|
||||
|
||||
class DreamEngine:
|
||||
"""Nightly dream consolidation engine.
|
||||
|
||||
Analyzes the previous day's sessions to find:
|
||||
1. Error patterns (tool failures, retries, incomplete tasks)
|
||||
2. Time waste patterns (slow tools, repeated operations, inefficient flows)
|
||||
3. Success patterns (what worked well)
|
||||
4. Generates actionable evolution proposals
|
||||
"""
|
||||
|
||||
def __init__(self, config: dict = None):
|
||||
self.config = config or _resolve_runtime_config()
|
||||
self._model_client = None
|
||||
self._current_prompt = ""
|
||||
|
||||
def run(self, hours: int = 24, max_runtime_seconds: int = 0) -> Optional[ReflectionReport]:
|
||||
"""Main dream consolidation flow.
|
||||
|
||||
Args:
|
||||
hours: Analyze data from the last N hours.
|
||||
max_runtime_seconds: Hard timeout in seconds. 0 = no limit.
|
||||
If exceeded, stops at the next step boundary and returns None.
|
||||
"""
|
||||
logger.info("Dream engine starting — analyzing last %d hours", hours)
|
||||
|
||||
deadline = time.time() + max_runtime_seconds if max_runtime_seconds > 0 else 0
|
||||
|
||||
now = time.time()
|
||||
cutoff = now - (hours * 3600)
|
||||
|
||||
try:
|
||||
# 1. Load session data
|
||||
scores = db.fetch_all(
|
||||
"session_scores",
|
||||
where="created_at >= ?",
|
||||
params=(cutoff,),
|
||||
order_by="created_at DESC",
|
||||
)
|
||||
tool_invocations = db.fetch_all(
|
||||
"tool_invocations",
|
||||
where="created_at >= ?",
|
||||
params=(cutoff,),
|
||||
order_by="created_at DESC",
|
||||
)
|
||||
signals = db.fetch_all(
|
||||
"outcome_signals",
|
||||
where="created_at >= ?",
|
||||
params=(cutoff,),
|
||||
)
|
||||
|
||||
if not scores:
|
||||
logger.info("No sessions to analyze")
|
||||
return None
|
||||
|
||||
# 2. Error analysis
|
||||
if deadline and time.time() > deadline:
|
||||
logger.warning("Dream engine timed out before error analysis")
|
||||
return None
|
||||
error_analysis = self._analyze_errors(scores, tool_invocations, signals)
|
||||
logger.info("Error analysis: %s", error_analysis.summary())
|
||||
|
||||
# 3. Time waste analysis
|
||||
if deadline and time.time() > deadline:
|
||||
logger.warning("Dream engine timed out before waste analysis")
|
||||
return None
|
||||
waste_analysis = self._analyze_time_waste(scores, tool_invocations)
|
||||
logger.info("Waste analysis: %s", waste_analysis.summary())
|
||||
|
||||
# 3.5. Code change analysis
|
||||
if deadline and time.time() > deadline:
|
||||
logger.warning("Dream engine timed out before code analysis")
|
||||
return None
|
||||
code_analysis = analyze_code_changes(hours=hours)
|
||||
logger.info("Code change analysis: %d commits found", code_analysis.total_commits)
|
||||
|
||||
# 4. Compute average score
|
||||
avg_score = (
|
||||
sum(s.get("composite_score", 0) for s in scores) / len(scores)
|
||||
if scores else 0
|
||||
)
|
||||
|
||||
# 5. Build reflection prompt
|
||||
if deadline and time.time() > deadline:
|
||||
logger.warning("Dream engine timed out before model call")
|
||||
return None
|
||||
prompt = self._build_reflection_prompt(
|
||||
scores, tool_invocations, signals,
|
||||
error_analysis, waste_analysis, avg_score,
|
||||
code_analysis=code_analysis,
|
||||
)
|
||||
|
||||
# 6. Call model for deep reflection
|
||||
reflection_text = self._call_model(prompt)
|
||||
if not reflection_text:
|
||||
logger.warning("Model returned empty reflection")
|
||||
return None
|
||||
|
||||
# 7. Parse reflection report
|
||||
report = self._parse_reflection(
|
||||
reflection_text=reflection_text,
|
||||
period_start=cutoff,
|
||||
period_end=now,
|
||||
sessions_analyzed=len(scores),
|
||||
avg_score=avg_score,
|
||||
error_analysis=error_analysis,
|
||||
waste_analysis=waste_analysis,
|
||||
code_analysis=code_analysis,
|
||||
)
|
||||
|
||||
# 8. Store report
|
||||
report_id = db.insert("reflection_reports", report.to_db_row())
|
||||
logger.info("Reflection report saved: id=%d, avg_score=%.3f", report_id, avg_score)
|
||||
|
||||
# 9. Generate evolution proposals
|
||||
from self_evolution.evolution_proposer import generate_proposals
|
||||
proposals = generate_proposals(report, report_id)
|
||||
for p in proposals:
|
||||
db.insert("evolution_proposals", p.to_db_row())
|
||||
logger.info("Generated %d evolution proposals", len(proposals))
|
||||
|
||||
# 10. Compress existing strategies
|
||||
try:
|
||||
from self_evolution.strategy_compressor import compress_strategies
|
||||
from self_evolution.strategy_store import StrategyStore
|
||||
store = StrategyStore()
|
||||
data = store.load()
|
||||
rules = data.get("rules", [])
|
||||
compressed = compress_strategies(rules)
|
||||
if len(compressed) < len(rules):
|
||||
data["rules"] = compressed
|
||||
store.save(data)
|
||||
logger.info("Strategies compressed: %d → %d", len(rules), len(compressed))
|
||||
except Exception as exc:
|
||||
logger.warning("Strategy compression failed: %s", exc)
|
||||
|
||||
# 11. Cleanup old data
|
||||
db.cleanup(days=30)
|
||||
|
||||
return report
|
||||
|
||||
except Exception as exc:
|
||||
logger.exception("Dream engine failed: %s", exc)
|
||||
return None
|
||||
|
||||
# ── Error Analysis ────────────────────────────────────────────────────
|
||||
|
||||
def _analyze_errors(
|
||||
self,
|
||||
scores: List[dict],
|
||||
invocations: List[dict],
|
||||
signals: List[dict],
|
||||
) -> ErrorAnalysis:
|
||||
"""Analyze all errors in the period.
|
||||
|
||||
Inspired by Claude Code conversation-analyzer's signal detection.
|
||||
"""
|
||||
# Tool failures
|
||||
failures = {}
|
||||
for inv in invocations:
|
||||
if not inv.get("success", True):
|
||||
tool = inv.get("tool_name", "unknown")
|
||||
error_type = inv.get("error_type", "unknown")
|
||||
key = f"{tool}:{error_type}"
|
||||
if key not in failures:
|
||||
failures[key] = ToolFailure(
|
||||
tool_name=tool,
|
||||
error_type=error_type,
|
||||
count=0,
|
||||
sessions_affected=[],
|
||||
example_session=inv.get("session_id", ""),
|
||||
)
|
||||
failures[key].count += 1
|
||||
sid = inv.get("session_id", "")
|
||||
if sid and sid not in failures[key].sessions_affected:
|
||||
failures[key].sessions_affected.append(sid)
|
||||
|
||||
# Retry patterns (same tool called > 2 times in same session)
|
||||
retries = self._detect_retry_patterns(invocations)
|
||||
|
||||
# Incomplete sessions
|
||||
incomplete = [
|
||||
s.get("session_id", "") for s in scores
|
||||
if s.get("completion_rate", 1.0) < 0.5
|
||||
]
|
||||
|
||||
# User corrections from signals
|
||||
corrections = [s for s in signals if s.get("signal_type") == "correction"]
|
||||
frustration = [s for s in signals if s.get("signal_type") == "frustration"]
|
||||
api_errors = [s for s in signals if s.get("signal_type") == "api_error"]
|
||||
|
||||
# API error type distribution
|
||||
api_error_types: Dict[str, int] = {}
|
||||
for s in api_errors:
|
||||
meta = json.loads(s.get("metadata", "{}"))
|
||||
etype = meta.get("error_type", "unknown")
|
||||
api_error_types[etype] = api_error_types.get(etype, 0) + 1
|
||||
|
||||
return ErrorAnalysis(
|
||||
tool_failures=sorted(failures.values(), key=lambda x: x.count, reverse=True),
|
||||
retry_patterns=retries,
|
||||
incomplete_sessions=incomplete,
|
||||
user_corrections=len(corrections),
|
||||
correction_examples=[s.get("metadata", "") for s in corrections[:3]],
|
||||
api_error_count=len(api_errors),
|
||||
api_error_types=api_error_types,
|
||||
)
|
||||
|
||||
def _detect_retry_patterns(self, invocations: List[dict]) -> List[RetryPattern]:
|
||||
"""Detect tools called > 2 times in same session."""
|
||||
session_tools: Dict[str, Dict[str, int]] = {}
|
||||
for inv in invocations:
|
||||
sid = inv.get("session_id", "")
|
||||
tool = inv.get("tool_name", "")
|
||||
if sid not in session_tools:
|
||||
session_tools[sid] = {}
|
||||
session_tools[sid][tool] = session_tools[sid].get(tool, 0) + 1
|
||||
|
||||
patterns = []
|
||||
for sid, tools in session_tools.items():
|
||||
for tool, count in tools.items():
|
||||
if count > 2:
|
||||
patterns.append(RetryPattern(
|
||||
session_id=sid,
|
||||
tool_name=tool,
|
||||
attempt_count=count,
|
||||
final_outcome="unknown",
|
||||
))
|
||||
return sorted(patterns, key=lambda x: x.attempt_count, reverse=True)[:20]
|
||||
|
||||
# ── Time Waste Analysis ───────────────────────────────────────────────
|
||||
|
||||
def _analyze_time_waste(
|
||||
self,
|
||||
scores: List[dict],
|
||||
invocations: List[dict],
|
||||
) -> WasteAnalysis:
|
||||
"""Analyze time waste patterns."""
|
||||
# Slowest tools
|
||||
tool_durations: Dict[str, List[int]] = {}
|
||||
for inv in invocations:
|
||||
tool = inv.get("tool_name", "")
|
||||
duration = inv.get("duration_ms", 0)
|
||||
if not duration:
|
||||
continue
|
||||
if tool not in tool_durations:
|
||||
tool_durations[tool] = []
|
||||
tool_durations[tool].append(duration)
|
||||
|
||||
slowest = [
|
||||
ToolDuration(
|
||||
tool_name=tool,
|
||||
total_duration_ms=sum(durs),
|
||||
call_count=len(durs),
|
||||
avg_duration_ms=sum(durs) / len(durs),
|
||||
)
|
||||
for tool, durs in tool_durations.items()
|
||||
]
|
||||
slowest.sort(key=lambda x: x.avg_duration_ms, reverse=True)
|
||||
|
||||
# Repeated operations (same tool + same session > 3 times)
|
||||
session_tool_calls: Dict[str, Dict[str, int]] = {}
|
||||
for inv in invocations:
|
||||
sid = inv.get("session_id", "")
|
||||
tool = inv.get("tool_name", "")
|
||||
if sid not in session_tool_calls:
|
||||
session_tool_calls[sid] = {}
|
||||
session_tool_calls[sid][tool] = session_tool_calls[sid].get(tool, 0) + 1
|
||||
|
||||
repeated = []
|
||||
for sid, tools in session_tool_calls.items():
|
||||
for tool, count in tools.items():
|
||||
if count > 3:
|
||||
repeated.append(RepeatedOperation(
|
||||
description=f"{tool} called {count} times",
|
||||
count=count,
|
||||
sessions=[sid],
|
||||
wasted_ms=tool_durations.get(tool, [0])[0] * (count - 2) if tool in tool_durations else 0,
|
||||
))
|
||||
|
||||
# Inefficient sessions (low efficiency score)
|
||||
inefficient = [
|
||||
s.get("session_id", "") for s in scores
|
||||
if s.get("efficiency_score", 1.0) < 0.3
|
||||
]
|
||||
|
||||
return WasteAnalysis(
|
||||
slowest_tools=slowest[:10],
|
||||
repeated_operations=sorted(repeated, key=lambda x: x.count, reverse=True)[:10],
|
||||
inefficient_sessions=inefficient,
|
||||
shortcut_opportunities=[],
|
||||
)
|
||||
|
||||
# ── Reflection Prompt ─────────────────────────────────────────────────
|
||||
|
||||
def _build_reflection_prompt(
|
||||
self,
|
||||
scores: List[dict],
|
||||
invocations: List[dict],
|
||||
signals: List[dict],
|
||||
errors: ErrorAnalysis,
|
||||
waste: WasteAnalysis,
|
||||
avg_score: float,
|
||||
code_analysis: CodeChangeAnalysis = None,
|
||||
) -> str:
|
||||
"""Build the reflection prompt as structured JSON data.
|
||||
|
||||
All analysis results are serialized as JSON so the model receives
|
||||
lossless data instead of pre-summarized text.
|
||||
"""
|
||||
# Load user prompt template (short: just overview + data placeholder)
|
||||
template_path = Path(__file__).parent / "prompts" / "reflection.md"
|
||||
if template_path.exists():
|
||||
template = template_path.read_text(encoding="utf-8")
|
||||
else:
|
||||
template = _DEFAULT_REFLECTION_PROMPT
|
||||
|
||||
# Compute statistics
|
||||
total_invocations = len(invocations)
|
||||
success_rate = (
|
||||
sum(1 for i in invocations if i.get("success", True)) / total_invocations * 100
|
||||
if total_invocations else 100
|
||||
)
|
||||
|
||||
# Period range
|
||||
if scores:
|
||||
ts_min = min(s.get("created_at", 0) for s in scores)
|
||||
ts_max = max(s.get("created_at", 0) for s in scores)
|
||||
period_range = (
|
||||
f"{time.strftime('%m-%d %H:%M', time.localtime(ts_min))} ~ "
|
||||
f"{time.strftime('%m-%d %H:%M', time.localtime(ts_max))}"
|
||||
)
|
||||
else:
|
||||
period_range = "N/A"
|
||||
|
||||
# Build structured data JSON — compact format to save tokens
|
||||
data = {}
|
||||
|
||||
# 1. Sessions — compact: [score, completion, efficiency, cost, satisfaction, category]
|
||||
data["sessions"] = [
|
||||
[
|
||||
round(s.get("composite_score", 0), 2),
|
||||
round(s.get("completion_rate", 0), 2),
|
||||
round(s.get("efficiency_score", 0), 2),
|
||||
round(s.get("cost_efficiency", 0), 2),
|
||||
round(s.get("satisfaction_proxy", 0), 2),
|
||||
s.get("task_category", ""),
|
||||
]
|
||||
for s in scores
|
||||
]
|
||||
|
||||
# 2. Tool usage — compact: {tool: [calls, failures, avg_ms]}
|
||||
tool_stats: Dict[str, List[int]] = {}
|
||||
for inv in invocations:
|
||||
tool = inv.get("tool_name", "")
|
||||
if tool not in tool_stats:
|
||||
tool_stats[tool] = [0, 0, 0] # calls, failures, total_ms
|
||||
tool_stats[tool][0] += 1
|
||||
if not inv.get("success", True):
|
||||
tool_stats[tool][1] += 1
|
||||
tool_stats[tool][2] += inv.get("duration_ms", 0) or 0
|
||||
data["tools"] = {
|
||||
t: [v[0], v[1], round(v[2] / max(v[0], 1))]
|
||||
for t, v in sorted(tool_stats.items(), key=lambda x: x[1][2], reverse=True)
|
||||
}
|
||||
|
||||
# 3. Signals — compact: {type: count}
|
||||
signal_types = {}
|
||||
for s in signals:
|
||||
stype = s.get("signal_type", "unknown")
|
||||
signal_types[stype] = signal_types.get(stype, 0) + 1
|
||||
data["signals"] = signal_types
|
||||
|
||||
# 4. Errors — only non-empty fields
|
||||
err_data = {}
|
||||
if errors.tool_failures:
|
||||
err_data["tool_failures"] = [
|
||||
f"{tf.tool_name}:{tf.error_type}x{tf.count}"
|
||||
for tf in errors.tool_failures
|
||||
]
|
||||
if errors.retry_patterns:
|
||||
err_data["retries"] = [
|
||||
f"{rp.tool_name}x{rp.attempt_count}"
|
||||
for rp in errors.retry_patterns[:5]
|
||||
]
|
||||
if errors.incomplete_sessions:
|
||||
err_data["incomplete"] = len(errors.incomplete_sessions)
|
||||
if errors.user_corrections:
|
||||
err_data["corrections"] = errors.user_corrections
|
||||
if errors.correction_examples:
|
||||
err_data["correction_examples"] = errors.correction_examples[:2]
|
||||
if errors.api_error_count:
|
||||
err_data["api_errors"] = errors.api_error_count
|
||||
if err_data:
|
||||
data["errors"] = err_data
|
||||
|
||||
# 5. Waste — only non-empty
|
||||
waste_data = {}
|
||||
if waste.slowest_tools:
|
||||
waste_data["slowest"] = [
|
||||
f"{td.tool_name} {round(td.avg_duration_ms)}ms/{td.call_count}calls"
|
||||
for td in waste.slowest_tools[:5]
|
||||
]
|
||||
if waste.repeated_operations:
|
||||
waste_data["repeated"] = [
|
||||
f"{ro.description} x{ro.count}"
|
||||
for ro in waste.repeated_operations[:3]
|
||||
]
|
||||
if waste.inefficient_sessions:
|
||||
waste_data["inefficient"] = len(waste.inefficient_sessions)
|
||||
if waste_data:
|
||||
data["waste"] = waste_data
|
||||
|
||||
# 6. Code changes — flat compact format
|
||||
if code_analysis and code_analysis.commits:
|
||||
cc = code_analysis
|
||||
commits_data = []
|
||||
for c in cc.commits[:10]:
|
||||
entry = f"{c.hash_short} {c.subject} +{c.insertions}/-{c.deletions}"
|
||||
if c.file_list:
|
||||
entry += f" [{','.join(c.file_list[:5])}]"
|
||||
if c.body:
|
||||
entry += f" | {c.body[:150]}"
|
||||
commits_data.append(entry)
|
||||
data["code_changes"] = {
|
||||
"stats": f"{cc.total_commits} commits +{cc.total_insertions}/-{cc.total_deletions} lines {cc.total_files_changed} files",
|
||||
"categories": cc.change_categories,
|
||||
"areas": cc.areas_touched,
|
||||
"commits": commits_data,
|
||||
}
|
||||
|
||||
data_json = json.dumps(data, ensure_ascii=False, indent=2)
|
||||
|
||||
# Fill template
|
||||
prompt = template.replace("{period_range}", period_range)
|
||||
prompt = prompt.replace("{sessions_count}", str(len(scores)))
|
||||
prompt = prompt.replace("{avg_score}", f"{avg_score:.3f}")
|
||||
prompt = prompt.replace("{total_invocations}", str(total_invocations))
|
||||
prompt = prompt.replace("{success_rate}", f"{success_rate:.1f}")
|
||||
prompt = prompt.replace("{data_json}", data_json)
|
||||
|
||||
return prompt
|
||||
|
||||
# ── Model Call ────────────────────────────────────────────────────────
|
||||
|
||||
def _call_model(self, prompt: str) -> Optional[str]:
|
||||
"""Call the active model with automatic failover.
|
||||
|
||||
Resolution order:
|
||||
1. Primary model (glm-5.1 via zai)
|
||||
2. Fallback model (Qwen3.6 via local) — if primary fails
|
||||
Health check: when on fallback, probes primary every 30 min
|
||||
and switches back when it recovers.
|
||||
"""
|
||||
self._current_prompt = prompt
|
||||
|
||||
active_cfg, is_fallback = _get_active_text_config(self.config)
|
||||
base_url = active_cfg.get("base_url", "")
|
||||
api_key = active_cfg.get("api_key", "")
|
||||
model = active_cfg.get("model", "")
|
||||
|
||||
if not base_url or not model:
|
||||
logger.warning("Incomplete runtime config: base_url=%s model=%s",
|
||||
bool(base_url), model)
|
||||
return None
|
||||
|
||||
result = self._call_chat_completions(base_url, api_key, model)
|
||||
|
||||
# If primary failed, try fallback
|
||||
if result is None and not is_fallback:
|
||||
fallback = self.config.get("fallback", {})
|
||||
if fallback.get("base_url") and fallback.get("model"):
|
||||
logger.warning("Primary model failed, trying fallback: %s",
|
||||
fallback.get("model"))
|
||||
result = self._call_chat_completions(
|
||||
fallback["base_url"], fallback.get("api_key", ""),
|
||||
fallback["model"],
|
||||
)
|
||||
if result is not None:
|
||||
_switch_to_fallback()
|
||||
|
||||
return result
|
||||
|
||||
def _call_chat_completions(
|
||||
self, base_url: str, api_key: str, model: str,
|
||||
) -> Optional[str]:
|
||||
"""Call OpenAI-compatible /chat/completions endpoint."""
|
||||
try:
|
||||
import requests
|
||||
url = f"{base_url.rstrip('/')}/chat/completions"
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
resp = requests.post(
|
||||
url,
|
||||
headers=headers,
|
||||
json={
|
||||
"model": model,
|
||||
"messages": [
|
||||
{"role": "system", "content": _SYSTEM_PROMPT},
|
||||
{"role": "user", "content": self._current_prompt or ""},
|
||||
],
|
||||
"temperature": 0.3,
|
||||
},
|
||||
timeout=300,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
return data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
else:
|
||||
logger.debug("Model call failed: %d %s", resp.status_code, resp.text[:200])
|
||||
except Exception as exc:
|
||||
logger.debug("Chat completions call failed: %s", exc)
|
||||
return None
|
||||
|
||||
# ── Multimodal Call ───────────────────────────────────────────────────
|
||||
|
||||
def call_multimodal(self, prompt: str, images: list = None) -> Optional[str]:
|
||||
"""Call multimodal model with text and optional images.
|
||||
|
||||
Routes to local multimodal model (gemma-4-26b-a4b-it-4bit) when
|
||||
images are involved. Falls back to text model if no images.
|
||||
|
||||
Args:
|
||||
prompt: Text prompt.
|
||||
images: List of image data, each item is either:
|
||||
- URL string (http/https/data:image)
|
||||
- bytes (raw image data, auto-encoded to base64)
|
||||
|
||||
Returns:
|
||||
Model response text, or None on failure.
|
||||
"""
|
||||
mm = self.config.get("multimodal", {})
|
||||
if not mm or not mm.get("base_url"):
|
||||
logger.debug("No multimodal model configured, falling back to text")
|
||||
return self._call_model(prompt)
|
||||
|
||||
# Build content with images
|
||||
content = [{"type": "text", "text": prompt}]
|
||||
for img in (images or []):
|
||||
if isinstance(img, bytes):
|
||||
import base64
|
||||
b64 = base64.b64encode(img).decode()
|
||||
content.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/png;base64,{b64}"},
|
||||
})
|
||||
elif isinstance(img, str):
|
||||
content.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": img},
|
||||
})
|
||||
|
||||
try:
|
||||
from openai import OpenAI
|
||||
client = OpenAI(
|
||||
base_url=mm["base_url"].rstrip("/") + ("/v1" if not mm["base_url"].rstrip("/").endswith("/v1") else ""),
|
||||
api_key=mm.get("api_key") or "no-key",
|
||||
)
|
||||
resp = client.chat.completions.create(
|
||||
model=mm["model"],
|
||||
messages=[{"role": "user", "content": content}],
|
||||
temperature=0.3,
|
||||
max_tokens=2000,
|
||||
timeout=120,
|
||||
)
|
||||
return resp.choices[0].message.content
|
||||
except Exception as exc:
|
||||
logger.debug("Multimodal call failed: %s", exc)
|
||||
return None
|
||||
|
||||
# ── Reflection Parsing ────────────────────────────────────────────────
|
||||
|
||||
def _parse_reflection(
|
||||
self,
|
||||
reflection_text: str,
|
||||
period_start: float,
|
||||
period_end: float,
|
||||
sessions_analyzed: int,
|
||||
avg_score: float,
|
||||
error_analysis: ErrorAnalysis,
|
||||
waste_analysis: WasteAnalysis,
|
||||
code_analysis: CodeChangeAnalysis = None,
|
||||
) -> ReflectionReport:
|
||||
"""Parse model output into structured ReflectionReport.
|
||||
|
||||
Extraction cascade:
|
||||
1. Direct JSON parse
|
||||
2. Strip markdown ```json ... ``` wrapper, retry JSON
|
||||
3. Extract JSON object via regex (handle trailing text)
|
||||
4. Text-based section extraction (fallback)
|
||||
"""
|
||||
worst_patterns = []
|
||||
best_patterns = []
|
||||
recommendations = []
|
||||
tool_insights = {}
|
||||
|
||||
text = reflection_text.strip()
|
||||
|
||||
# 1. Direct JSON parse
|
||||
data = _try_parse_json(text)
|
||||
|
||||
if data is None:
|
||||
# 2. Strip markdown wrapper
|
||||
m = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
|
||||
if m:
|
||||
data = _try_parse_json(m.group(1))
|
||||
|
||||
if data is None:
|
||||
# 3. Regex extract first JSON object
|
||||
m = re.search(r'\{[^{}]*"(?:worst|best|recommendations)"[^{}]*\}', text, re.DOTALL)
|
||||
if m:
|
||||
data = _try_parse_json(m.group(0))
|
||||
|
||||
if data is None:
|
||||
# 3.5. Broader regex — find outermost braces
|
||||
start = text.find('{')
|
||||
end = text.rfind('}')
|
||||
if start != -1 and end > start:
|
||||
data = _try_parse_json(text[start:end + 1])
|
||||
|
||||
if data is not None:
|
||||
worst_patterns = data.get("worst_patterns") or []
|
||||
best_patterns = data.get("best_patterns") or []
|
||||
recommendations = data.get("recommendations") or []
|
||||
tool_insights = data.get("tool_insights") or {}
|
||||
else:
|
||||
# 4. Text-based extraction
|
||||
section = None
|
||||
for line in text.split("\n"):
|
||||
stripped = line.strip()
|
||||
lower = stripped.lower()
|
||||
if ("worst" in lower and "pattern" in lower) or "最差" in stripped or "错误模式" in stripped:
|
||||
section = "worst"
|
||||
elif ("best" in lower and "pattern" in lower) or "最佳" in stripped or "成功" in stripped:
|
||||
section = "best"
|
||||
elif ("recommend" in lower) or "建议" in stripped:
|
||||
section = "rec"
|
||||
elif stripped.startswith("- ") or stripped.startswith("* ") or stripped.startswith("• "):
|
||||
item = stripped.lstrip("-*• ").strip()
|
||||
if section == "worst":
|
||||
worst_patterns.append(item)
|
||||
elif section == "best":
|
||||
best_patterns.append(item)
|
||||
elif section == "rec":
|
||||
recommendations.append(item)
|
||||
elif len(stripped) > 2 and stripped[0].isdigit() and stripped[1] in ".)" and stripped[2] == " ":
|
||||
item = stripped[3:].strip()
|
||||
if section == "worst":
|
||||
worst_patterns.append(item)
|
||||
elif section == "best":
|
||||
best_patterns.append(item)
|
||||
elif section == "rec":
|
||||
recommendations.append(item)
|
||||
|
||||
return ReflectionReport(
|
||||
period_start=period_start,
|
||||
period_end=period_end,
|
||||
sessions_analyzed=sessions_analyzed,
|
||||
avg_score=avg_score,
|
||||
error_summary=error_analysis.summary(),
|
||||
waste_summary=waste_analysis.summary(),
|
||||
worst_patterns=worst_patterns,
|
||||
best_patterns=best_patterns,
|
||||
tool_insights=tool_insights,
|
||||
recommendations=recommendations,
|
||||
code_change_summary=code_analysis.summary() if code_analysis else "",
|
||||
model_used=self.config.get("model", "unknown"),
|
||||
)
|
||||
|
||||
|
||||
# ── Default Prompt Template ──────────────────────────────────────────────
|
||||
|
||||
_SYSTEM_PROMPT = (
|
||||
"你是 Hermes Agent 性能分析引擎。分析运行数据+代码变更,输出严格JSON(无markdown)。\n"
|
||||
"格式:\n"
|
||||
'{"worst_patterns":["模式(工具+场景+根因)"],"best_patterns":["成功经验"],'
|
||||
'"tool_insights":{"工具":{"sr":0.95,"ms":500,"rec":"建议"}},'
|
||||
'"recommendations":["做什么|效果|风险(l/m/h)|验证"]}\n'
|
||||
"重点:系统性错误>偶发,错误连锁,策略vs工具问题,重复操作,代码设计合理性,自我进化状态,"
|
||||
"可固化流程。≤5条建议,优先高影响低风险。无数据时输出空数组。"
|
||||
)
|
||||
|
||||
|
||||
_DEFAULT_REFLECTION_PROMPT = """## 概况
|
||||
- 时段: {period_range}
|
||||
- Session 数: {sessions_count}, 平均质量: {avg_score}
|
||||
- 工具调用: {total_invocations} 次, 成功率 {success_rate}%
|
||||
|
||||
## 数据
|
||||
{data_json}
|
||||
"""
|
||||
|
||||
|
||||
def _try_parse_json(text: str) -> Optional[dict]:
|
||||
"""Try to parse JSON, returning None on any failure."""
|
||||
try:
|
||||
data = json.loads(text)
|
||||
if isinstance(data, dict):
|
||||
return data
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
return None
|
||||
Loading…
Add table
Add a link
Reference in a new issue