feat: add self-evolution plugin — agent self-optimization system

Add a comprehensive self-evolution system that enables Hermes Agent to continuously improve through automated analysis and optimization: Core components: - reflection_engine: Nightly session analysis (1:00 AM) - evolution_proposer: Generate improvement proposals from insights - quality_scorer: Multi-dimensional session quality evaluation - strategy_injector: Inject learned strategies into new sessions - strategy_compressor: Strategy optimization and deduplication - git_analyzer: Code change pattern analysis - rule_engine: Pattern-based rule generation - feishu_notifier: Feishu card notifications for evolution events Storage: - db.py: SQLite telemetry storage - strategy_store: Persistent strategy storage - models.py: Data models Plugin integration: - plugin.yaml, hooks.py, __init__.py for plugin system - cron_jobs.py for scheduled tasks Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-27 01:11:40 +00:00 · 2026-04-25 00:40:13 +08:00 · 2026-04-25 00:40:13 +08:00 · 3cd384dc43
commit 3cd384dc43
parent e5d41f05d4
23 changed files with 6173 additions and 0 deletions
--- a/self_evolution/reflection_engine.py
+++ b/self_evolution/reflection_engine.py
@ -0,0 +1,751 @@
+"""
+Self Evolution Plugin — Dream Engine (Reflection Engine)
+=========================================================
+
+Runs nightly at 1:00 to analyze the previous day's sessions.
+
+Design reference: Claude Code plugins/hookify/agents/conversation-analyzer.md
+  - Analyzes conversations in reverse chronological order
+  - Detects: corrections, frustrations, repeated issues, reversions
+  - Extracts tool usage patterns, converts to actionable rules
+  - Categorizes by severity
+
+We extend this pattern with:
+  - Full automated analysis (not just on user request)
+  - Error analysis (tool failures, retries, API errors)
+  - Time waste analysis (slow tools, repeated ops, inefficient sessions)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import re
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from self_evolution import db
+from self_evolution.model_config import resolve_config, get_active_text_config, switch_to_fallback
+from self_evolution.git_analyzer import analyze_code_changes
+from self_evolution.models import (
+    ErrorAnalysis, ToolFailure, RetryPattern,
+    WasteAnalysis, ToolDuration, RepeatedOperation,
+    CodeChangeAnalysis, CommitInfo,
+    ReflectionReport,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ── Backward-compatible aliases ────────────────────────────────────────────
+# These are used by cron_jobs.py and other callers.
+_resolve_runtime_config = resolve_config
+_get_active_text_config = get_active_text_config
+_switch_to_fallback = switch_to_fallback
+
+
+class DreamEngine:
+    """Nightly dream consolidation engine.
+
+    Analyzes the previous day's sessions to find:
+    1. Error patterns (tool failures, retries, incomplete tasks)
+    2. Time waste patterns (slow tools, repeated operations, inefficient flows)
+    3. Success patterns (what worked well)
+    4. Generates actionable evolution proposals
+    """
+
+    def __init__(self, config: dict = None):
+        self.config = config or _resolve_runtime_config()
+        self._model_client = None
+        self._current_prompt = ""
+
+    def run(self, hours: int = 24, max_runtime_seconds: int = 0) -> Optional[ReflectionReport]:
+        """Main dream consolidation flow.
+
+        Args:
+            hours: Analyze data from the last N hours.
+            max_runtime_seconds: Hard timeout in seconds. 0 = no limit.
+                If exceeded, stops at the next step boundary and returns None.
+        """
+        logger.info("Dream engine starting — analyzing last %d hours", hours)
+
+        deadline = time.time() + max_runtime_seconds if max_runtime_seconds > 0 else 0
+
+        now = time.time()
+        cutoff = now - (hours * 3600)
+
+        try:
+            # 1. Load session data
+            scores = db.fetch_all(
+                "session_scores",
+                where="created_at >= ?",
+                params=(cutoff,),
+                order_by="created_at DESC",
+            )
+            tool_invocations = db.fetch_all(
+                "tool_invocations",
+                where="created_at >= ?",
+                params=(cutoff,),
+                order_by="created_at DESC",
+            )
+            signals = db.fetch_all(
+                "outcome_signals",
+                where="created_at >= ?",
+                params=(cutoff,),
+            )
+
+            if not scores:
+                logger.info("No sessions to analyze")
+                return None
+
+            # 2. Error analysis
+            if deadline and time.time() > deadline:
+                logger.warning("Dream engine timed out before error analysis")
+                return None
+            error_analysis = self._analyze_errors(scores, tool_invocations, signals)
+            logger.info("Error analysis: %s", error_analysis.summary())
+
+            # 3. Time waste analysis
+            if deadline and time.time() > deadline:
+                logger.warning("Dream engine timed out before waste analysis")
+                return None
+            waste_analysis = self._analyze_time_waste(scores, tool_invocations)
+            logger.info("Waste analysis: %s", waste_analysis.summary())
+
+            # 3.5. Code change analysis
+            if deadline and time.time() > deadline:
+                logger.warning("Dream engine timed out before code analysis")
+                return None
+            code_analysis = analyze_code_changes(hours=hours)
+            logger.info("Code change analysis: %d commits found", code_analysis.total_commits)
+
+            # 4. Compute average score
+            avg_score = (
+                sum(s.get("composite_score", 0) for s in scores) / len(scores)
+                if scores else 0
+            )
+
+            # 5. Build reflection prompt
+            if deadline and time.time() > deadline:
+                logger.warning("Dream engine timed out before model call")
+                return None
+            prompt = self._build_reflection_prompt(
+                scores, tool_invocations, signals,
+                error_analysis, waste_analysis, avg_score,
+                code_analysis=code_analysis,
+            )
+
+            # 6. Call model for deep reflection
+            reflection_text = self._call_model(prompt)
+            if not reflection_text:
+                logger.warning("Model returned empty reflection")
+                return None
+
+            # 7. Parse reflection report
+            report = self._parse_reflection(
+                reflection_text=reflection_text,
+                period_start=cutoff,
+                period_end=now,
+                sessions_analyzed=len(scores),
+                avg_score=avg_score,
+                error_analysis=error_analysis,
+                waste_analysis=waste_analysis,
+                code_analysis=code_analysis,
+            )
+
+            # 8. Store report
+            report_id = db.insert("reflection_reports", report.to_db_row())
+            logger.info("Reflection report saved: id=%d, avg_score=%.3f", report_id, avg_score)
+
+            # 9. Generate evolution proposals
+            from self_evolution.evolution_proposer import generate_proposals
+            proposals = generate_proposals(report, report_id)
+            for p in proposals:
+                db.insert("evolution_proposals", p.to_db_row())
+            logger.info("Generated %d evolution proposals", len(proposals))
+
+            # 10. Compress existing strategies
+            try:
+                from self_evolution.strategy_compressor import compress_strategies
+                from self_evolution.strategy_store import StrategyStore
+                store = StrategyStore()
+                data = store.load()
+                rules = data.get("rules", [])
+                compressed = compress_strategies(rules)
+                if len(compressed) < len(rules):
+                    data["rules"] = compressed
+                    store.save(data)
+                    logger.info("Strategies compressed: %d → %d", len(rules), len(compressed))
+            except Exception as exc:
+                logger.warning("Strategy compression failed: %s", exc)
+
+            # 11. Cleanup old data
+            db.cleanup(days=30)
+
+            return report
+
+        except Exception as exc:
+            logger.exception("Dream engine failed: %s", exc)
+            return None
+
+    # ── Error Analysis ────────────────────────────────────────────────────
+
+    def _analyze_errors(
+        self,
+        scores: List[dict],
+        invocations: List[dict],
+        signals: List[dict],
+    ) -> ErrorAnalysis:
+        """Analyze all errors in the period.
+
+        Inspired by Claude Code conversation-analyzer's signal detection.
+        """
+        # Tool failures
+        failures = {}
+        for inv in invocations:
+            if not inv.get("success", True):
+                tool = inv.get("tool_name", "unknown")
+                error_type = inv.get("error_type", "unknown")
+                key = f"{tool}:{error_type}"
+                if key not in failures:
+                    failures[key] = ToolFailure(
+                        tool_name=tool,
+                        error_type=error_type,
+                        count=0,
+                        sessions_affected=[],
+                        example_session=inv.get("session_id", ""),
+                    )
+                failures[key].count += 1
+                sid = inv.get("session_id", "")
+                if sid and sid not in failures[key].sessions_affected:
+                    failures[key].sessions_affected.append(sid)
+
+        # Retry patterns (same tool called > 2 times in same session)
+        retries = self._detect_retry_patterns(invocations)
+
+        # Incomplete sessions
+        incomplete = [
+            s.get("session_id", "") for s in scores
+            if s.get("completion_rate", 1.0) < 0.5
+        ]
+
+        # User corrections from signals
+        corrections = [s for s in signals if s.get("signal_type") == "correction"]
+        frustration = [s for s in signals if s.get("signal_type") == "frustration"]
+        api_errors = [s for s in signals if s.get("signal_type") == "api_error"]
+
+        # API error type distribution
+        api_error_types: Dict[str, int] = {}
+        for s in api_errors:
+            meta = json.loads(s.get("metadata", "{}"))
+            etype = meta.get("error_type", "unknown")
+            api_error_types[etype] = api_error_types.get(etype, 0) + 1
+
+        return ErrorAnalysis(
+            tool_failures=sorted(failures.values(), key=lambda x: x.count, reverse=True),
+            retry_patterns=retries,
+            incomplete_sessions=incomplete,
+            user_corrections=len(corrections),
+            correction_examples=[s.get("metadata", "") for s in corrections[:3]],
+            api_error_count=len(api_errors),
+            api_error_types=api_error_types,
+        )
+
+    def _detect_retry_patterns(self, invocations: List[dict]) -> List[RetryPattern]:
+        """Detect tools called > 2 times in same session."""
+        session_tools: Dict[str, Dict[str, int]] = {}
+        for inv in invocations:
+            sid = inv.get("session_id", "")
+            tool = inv.get("tool_name", "")
+            if sid not in session_tools:
+                session_tools[sid] = {}
+            session_tools[sid][tool] = session_tools[sid].get(tool, 0) + 1
+
+        patterns = []
+        for sid, tools in session_tools.items():
+            for tool, count in tools.items():
+                if count > 2:
+                    patterns.append(RetryPattern(
+                        session_id=sid,
+                        tool_name=tool,
+                        attempt_count=count,
+                        final_outcome="unknown",
+                    ))
+        return sorted(patterns, key=lambda x: x.attempt_count, reverse=True)[:20]
+
+    # ── Time Waste Analysis ───────────────────────────────────────────────
+
+    def _analyze_time_waste(
+        self,
+        scores: List[dict],
+        invocations: List[dict],
+    ) -> WasteAnalysis:
+        """Analyze time waste patterns."""
+        # Slowest tools
+        tool_durations: Dict[str, List[int]] = {}
+        for inv in invocations:
+            tool = inv.get("tool_name", "")
+            duration = inv.get("duration_ms", 0)
+            if not duration:
+                continue
+            if tool not in tool_durations:
+                tool_durations[tool] = []
+            tool_durations[tool].append(duration)
+
+        slowest = [
+            ToolDuration(
+                tool_name=tool,
+                total_duration_ms=sum(durs),
+                call_count=len(durs),
+                avg_duration_ms=sum(durs) / len(durs),
+            )
+            for tool, durs in tool_durations.items()
+        ]
+        slowest.sort(key=lambda x: x.avg_duration_ms, reverse=True)
+
+        # Repeated operations (same tool + same session > 3 times)
+        session_tool_calls: Dict[str, Dict[str, int]] = {}
+        for inv in invocations:
+            sid = inv.get("session_id", "")
+            tool = inv.get("tool_name", "")
+            if sid not in session_tool_calls:
+                session_tool_calls[sid] = {}
+            session_tool_calls[sid][tool] = session_tool_calls[sid].get(tool, 0) + 1
+
+        repeated = []
+        for sid, tools in session_tool_calls.items():
+            for tool, count in tools.items():
+                if count > 3:
+                    repeated.append(RepeatedOperation(
+                        description=f"{tool} called {count} times",
+                        count=count,
+                        sessions=[sid],
+                        wasted_ms=tool_durations.get(tool, [0])[0] * (count - 2) if tool in tool_durations else 0,
+                    ))
+
+        # Inefficient sessions (low efficiency score)
+        inefficient = [
+            s.get("session_id", "") for s in scores
+            if s.get("efficiency_score", 1.0) < 0.3
+        ]
+
+        return WasteAnalysis(
+            slowest_tools=slowest[:10],
+            repeated_operations=sorted(repeated, key=lambda x: x.count, reverse=True)[:10],
+            inefficient_sessions=inefficient,
+            shortcut_opportunities=[],
+        )
+
+    # ── Reflection Prompt ─────────────────────────────────────────────────
+
+    def _build_reflection_prompt(
+        self,
+        scores: List[dict],
+        invocations: List[dict],
+        signals: List[dict],
+        errors: ErrorAnalysis,
+        waste: WasteAnalysis,
+        avg_score: float,
+        code_analysis: CodeChangeAnalysis = None,
+    ) -> str:
+        """Build the reflection prompt as structured JSON data.
+
+        All analysis results are serialized as JSON so the model receives
+        lossless data instead of pre-summarized text.
+        """
+        # Load user prompt template (short: just overview + data placeholder)
+        template_path = Path(__file__).parent / "prompts" / "reflection.md"
+        if template_path.exists():
+            template = template_path.read_text(encoding="utf-8")
+        else:
+            template = _DEFAULT_REFLECTION_PROMPT
+
+        # Compute statistics
+        total_invocations = len(invocations)
+        success_rate = (
+            sum(1 for i in invocations if i.get("success", True)) / total_invocations * 100
+            if total_invocations else 100
+        )
+
+        # Period range
+        if scores:
+            ts_min = min(s.get("created_at", 0) for s in scores)
+            ts_max = max(s.get("created_at", 0) for s in scores)
+            period_range = (
+                f"{time.strftime('%m-%d %H:%M', time.localtime(ts_min))} ~ "
+                f"{time.strftime('%m-%d %H:%M', time.localtime(ts_max))}"
+            )
+        else:
+            period_range = "N/A"
+
+        # Build structured data JSON — compact format to save tokens
+        data = {}
+
+        # 1. Sessions — compact: [score, completion, efficiency, cost, satisfaction, category]
+        data["sessions"] = [
+            [
+                round(s.get("composite_score", 0), 2),
+                round(s.get("completion_rate", 0), 2),
+                round(s.get("efficiency_score", 0), 2),
+                round(s.get("cost_efficiency", 0), 2),
+                round(s.get("satisfaction_proxy", 0), 2),
+                s.get("task_category", ""),
+            ]
+            for s in scores
+        ]
+
+        # 2. Tool usage — compact: {tool: [calls, failures, avg_ms]}
+        tool_stats: Dict[str, List[int]] = {}
+        for inv in invocations:
+            tool = inv.get("tool_name", "")
+            if tool not in tool_stats:
+                tool_stats[tool] = [0, 0, 0]  # calls, failures, total_ms
+            tool_stats[tool][0] += 1
+            if not inv.get("success", True):
+                tool_stats[tool][1] += 1
+            tool_stats[tool][2] += inv.get("duration_ms", 0) or 0
+        data["tools"] = {
+            t: [v[0], v[1], round(v[2] / max(v[0], 1))]
+            for t, v in sorted(tool_stats.items(), key=lambda x: x[1][2], reverse=True)
+        }
+
+        # 3. Signals — compact: {type: count}
+        signal_types = {}
+        for s in signals:
+            stype = s.get("signal_type", "unknown")
+            signal_types[stype] = signal_types.get(stype, 0) + 1
+        data["signals"] = signal_types
+
+        # 4. Errors — only non-empty fields
+        err_data = {}
+        if errors.tool_failures:
+            err_data["tool_failures"] = [
+                f"{tf.tool_name}:{tf.error_type}x{tf.count}"
+                for tf in errors.tool_failures
+            ]
+        if errors.retry_patterns:
+            err_data["retries"] = [
+                f"{rp.tool_name}x{rp.attempt_count}"
+                for rp in errors.retry_patterns[:5]
+            ]
+        if errors.incomplete_sessions:
+            err_data["incomplete"] = len(errors.incomplete_sessions)
+        if errors.user_corrections:
+            err_data["corrections"] = errors.user_corrections
+            if errors.correction_examples:
+                err_data["correction_examples"] = errors.correction_examples[:2]
+        if errors.api_error_count:
+            err_data["api_errors"] = errors.api_error_count
+        if err_data:
+            data["errors"] = err_data
+
+        # 5. Waste — only non-empty
+        waste_data = {}
+        if waste.slowest_tools:
+            waste_data["slowest"] = [
+                f"{td.tool_name} {round(td.avg_duration_ms)}ms/{td.call_count}calls"
+                for td in waste.slowest_tools[:5]
+            ]
+        if waste.repeated_operations:
+            waste_data["repeated"] = [
+                f"{ro.description} x{ro.count}"
+                for ro in waste.repeated_operations[:3]
+            ]
+        if waste.inefficient_sessions:
+            waste_data["inefficient"] = len(waste.inefficient_sessions)
+        if waste_data:
+            data["waste"] = waste_data
+
+        # 6. Code changes — flat compact format
+        if code_analysis and code_analysis.commits:
+            cc = code_analysis
+            commits_data = []
+            for c in cc.commits[:10]:
+                entry = f"{c.hash_short} {c.subject} +{c.insertions}/-{c.deletions}"
+                if c.file_list:
+                    entry += f" [{','.join(c.file_list[:5])}]"
+                if c.body:
+                    entry += f" | {c.body[:150]}"
+                commits_data.append(entry)
+            data["code_changes"] = {
+                "stats": f"{cc.total_commits} commits +{cc.total_insertions}/-{cc.total_deletions} lines {cc.total_files_changed} files",
+                "categories": cc.change_categories,
+                "areas": cc.areas_touched,
+                "commits": commits_data,
+            }
+
+        data_json = json.dumps(data, ensure_ascii=False, indent=2)
+
+        # Fill template
+        prompt = template.replace("{period_range}", period_range)
+        prompt = prompt.replace("{sessions_count}", str(len(scores)))
+        prompt = prompt.replace("{avg_score}", f"{avg_score:.3f}")
+        prompt = prompt.replace("{total_invocations}", str(total_invocations))
+        prompt = prompt.replace("{success_rate}", f"{success_rate:.1f}")
+        prompt = prompt.replace("{data_json}", data_json)
+
+        return prompt
+
+    # ── Model Call ────────────────────────────────────────────────────────
+
+    def _call_model(self, prompt: str) -> Optional[str]:
+        """Call the active model with automatic failover.
+
+        Resolution order:
+          1. Primary model (glm-5.1 via zai)
+          2. Fallback model (Qwen3.6 via local) — if primary fails
+        Health check: when on fallback, probes primary every 30 min
+        and switches back when it recovers.
+        """
+        self._current_prompt = prompt
+
+        active_cfg, is_fallback = _get_active_text_config(self.config)
+        base_url = active_cfg.get("base_url", "")
+        api_key = active_cfg.get("api_key", "")
+        model = active_cfg.get("model", "")
+
+        if not base_url or not model:
+            logger.warning("Incomplete runtime config: base_url=%s model=%s",
+                           bool(base_url), model)
+            return None
+
+        result = self._call_chat_completions(base_url, api_key, model)
+
+        # If primary failed, try fallback
+        if result is None and not is_fallback:
+            fallback = self.config.get("fallback", {})
+            if fallback.get("base_url") and fallback.get("model"):
+                logger.warning("Primary model failed, trying fallback: %s",
+                               fallback.get("model"))
+                result = self._call_chat_completions(
+                    fallback["base_url"], fallback.get("api_key", ""),
+                    fallback["model"],
+                )
+                if result is not None:
+                    _switch_to_fallback()
+
+        return result
+
+    def _call_chat_completions(
+        self, base_url: str, api_key: str, model: str,
+    ) -> Optional[str]:
+        """Call OpenAI-compatible /chat/completions endpoint."""
+        try:
+            import requests
+            url = f"{base_url.rstrip('/')}/chat/completions"
+            headers = {"Content-Type": "application/json"}
+            if api_key:
+                headers["Authorization"] = f"Bearer {api_key}"
+
+            resp = requests.post(
+                url,
+                headers=headers,
+                json={
+                    "model": model,
+                    "messages": [
+                        {"role": "system", "content": _SYSTEM_PROMPT},
+                        {"role": "user", "content": self._current_prompt or ""},
+                    ],
+                    "temperature": 0.3,
+                },
+                timeout=300,
+            )
+            if resp.status_code == 200:
+                data = resp.json()
+                return data.get("choices", [{}])[0].get("message", {}).get("content", "")
+            else:
+                logger.debug("Model call failed: %d %s", resp.status_code, resp.text[:200])
+        except Exception as exc:
+            logger.debug("Chat completions call failed: %s", exc)
+        return None
+
+    # ── Multimodal Call ───────────────────────────────────────────────────
+
+    def call_multimodal(self, prompt: str, images: list = None) -> Optional[str]:
+        """Call multimodal model with text and optional images.
+
+        Routes to local multimodal model (gemma-4-26b-a4b-it-4bit) when
+        images are involved. Falls back to text model if no images.
+
+        Args:
+            prompt: Text prompt.
+            images: List of image data, each item is either:
+                - URL string (http/https/data:image)
+                - bytes (raw image data, auto-encoded to base64)
+
+        Returns:
+            Model response text, or None on failure.
+        """
+        mm = self.config.get("multimodal", {})
+        if not mm or not mm.get("base_url"):
+            logger.debug("No multimodal model configured, falling back to text")
+            return self._call_model(prompt)
+
+        # Build content with images
+        content = [{"type": "text", "text": prompt}]
+        for img in (images or []):
+            if isinstance(img, bytes):
+                import base64
+                b64 = base64.b64encode(img).decode()
+                content.append({
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{b64}"},
+                })
+            elif isinstance(img, str):
+                content.append({
+                    "type": "image_url",
+                    "image_url": {"url": img},
+                })
+
+        try:
+            from openai import OpenAI
+            client = OpenAI(
+                base_url=mm["base_url"].rstrip("/") + ("/v1" if not mm["base_url"].rstrip("/").endswith("/v1") else ""),
+                api_key=mm.get("api_key") or "no-key",
+            )
+            resp = client.chat.completions.create(
+                model=mm["model"],
+                messages=[{"role": "user", "content": content}],
+                temperature=0.3,
+                max_tokens=2000,
+                timeout=120,
+            )
+            return resp.choices[0].message.content
+        except Exception as exc:
+            logger.debug("Multimodal call failed: %s", exc)
+            return None
+
+    # ── Reflection Parsing ────────────────────────────────────────────────
+
+    def _parse_reflection(
+        self,
+        reflection_text: str,
+        period_start: float,
+        period_end: float,
+        sessions_analyzed: int,
+        avg_score: float,
+        error_analysis: ErrorAnalysis,
+        waste_analysis: WasteAnalysis,
+        code_analysis: CodeChangeAnalysis = None,
+    ) -> ReflectionReport:
+        """Parse model output into structured ReflectionReport.
+
+        Extraction cascade:
+          1. Direct JSON parse
+          2. Strip markdown ```json ... ``` wrapper, retry JSON
+          3. Extract JSON object via regex (handle trailing text)
+          4. Text-based section extraction (fallback)
+        """
+        worst_patterns = []
+        best_patterns = []
+        recommendations = []
+        tool_insights = {}
+
+        text = reflection_text.strip()
+
+        # 1. Direct JSON parse
+        data = _try_parse_json(text)
+
+        if data is None:
+            # 2. Strip markdown wrapper
+            m = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
+            if m:
+                data = _try_parse_json(m.group(1))
+
+        if data is None:
+            # 3. Regex extract first JSON object
+            m = re.search(r'\{[^{}]*"(?:worst|best|recommendations)"[^{}]*\}', text, re.DOTALL)
+            if m:
+                data = _try_parse_json(m.group(0))
+
+        if data is None:
+            # 3.5. Broader regex — find outermost braces
+            start = text.find('{')
+            end = text.rfind('}')
+            if start != -1 and end > start:
+                data = _try_parse_json(text[start:end + 1])
+
+        if data is not None:
+            worst_patterns = data.get("worst_patterns") or []
+            best_patterns = data.get("best_patterns") or []
+            recommendations = data.get("recommendations") or []
+            tool_insights = data.get("tool_insights") or {}
+        else:
+            # 4. Text-based extraction
+            section = None
+            for line in text.split("\n"):
+                stripped = line.strip()
+                lower = stripped.lower()
+                if ("worst" in lower and "pattern" in lower) or "最差" in stripped or "错误模式" in stripped:
+                    section = "worst"
+                elif ("best" in lower and "pattern" in lower) or "最佳" in stripped or "成功" in stripped:
+                    section = "best"
+                elif ("recommend" in lower) or "建议" in stripped:
+                    section = "rec"
+                elif stripped.startswith("- ") or stripped.startswith("* ") or stripped.startswith("• "):
+                    item = stripped.lstrip("-*• ").strip()
+                    if section == "worst":
+                        worst_patterns.append(item)
+                    elif section == "best":
+                        best_patterns.append(item)
+                    elif section == "rec":
+                        recommendations.append(item)
+                elif len(stripped) > 2 and stripped[0].isdigit() and stripped[1] in ".)" and stripped[2] == " ":
+                    item = stripped[3:].strip()
+                    if section == "worst":
+                        worst_patterns.append(item)
+                    elif section == "best":
+                        best_patterns.append(item)
+                    elif section == "rec":
+                        recommendations.append(item)
+
+        return ReflectionReport(
+            period_start=period_start,
+            period_end=period_end,
+            sessions_analyzed=sessions_analyzed,
+            avg_score=avg_score,
+            error_summary=error_analysis.summary(),
+            waste_summary=waste_analysis.summary(),
+            worst_patterns=worst_patterns,
+            best_patterns=best_patterns,
+            tool_insights=tool_insights,
+            recommendations=recommendations,
+            code_change_summary=code_analysis.summary() if code_analysis else "",
+            model_used=self.config.get("model", "unknown"),
+        )
+
+
+# ── Default Prompt Template ──────────────────────────────────────────────
+
+_SYSTEM_PROMPT = (
+    "你是 Hermes Agent 性能分析引擎。分析运行数据+代码变更，输出严格JSON（无markdown）。\n"
+    "格式:\n"
+    '{"worst_patterns":["模式(工具+场景+根因)"],"best_patterns":["成功经验"],'
+    '"tool_insights":{"工具":{"sr":0.95,"ms":500,"rec":"建议"}},'
+    '"recommendations":["做什么|效果|风险(l/m/h)|验证"]}\n'
+    "重点:系统性错误>偶发,错误连锁,策略vs工具问题,重复操作,代码设计合理性,自我进化状态,"
+    "可固化流程。≤5条建议,优先高影响低风险。无数据时输出空数组。"
+)
+
+
+_DEFAULT_REFLECTION_PROMPT = """## 概况
+- 时段: {period_range}
+- Session 数: {sessions_count}, 平均质量: {avg_score}
+- 工具调用: {total_invocations} 次, 成功率 {success_rate}%
+
+## 数据
+{data_json}
+"""
+
+
+def _try_parse_json(text: str) -> Optional[dict]:
+    """Try to parse JSON, returning None on any failure."""
+    try:
+        data = json.loads(text)
+        if isinstance(data, dict):
+            return data
+    except (json.JSONDecodeError, ValueError):
+        pass
+    return None