diff --git a/docs/self-evolution-design.html b/docs/self-evolution-design.html
new file mode 100644
index 0000000000..5736299e17
--- /dev/null
+++ b/docs/self-evolution-design.html
@@ -0,0 +1,911 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Hermes Agent 自我优化与持续进化系统设计</title>
+<style>
+  :root {
+    --bg: #0f1117;
+    --bg-card: #1a1d2e;
+    --bg-card2: #232740;
+    --border: #2d3250;
+    --text: #e2e8f0;
+    --text-dim: #94a3b8;
+    --accent: #6366f1;
+    --accent2: #8b5cf6;
+    --green: #10b981;
+    --green-dim: rgba(16,185,129,0.15);
+    --amber: #f59e0b;
+    --amber-dim: rgba(245,158,11,0.15);
+    --red: #ef4444;
+    --red-dim: rgba(239,68,68,0.15);
+    --blue: #3b82f6;
+    --blue-dim: rgba(59,130,246,0.15);
+    --cyan: #06b6d4;
+    --pink: #ec4899;
+  }
+  * { margin:0; padding:0; box-sizing:border-box; }
+  body {
+    background: var(--bg);
+    color: var(--text);
+    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+    line-height: 1.6;
+    padding: 2rem;
+    max-width: 1200px;
+    margin: 0 auto;
+  }
+  h1 { font-size: 2rem; font-weight: 700; margin-bottom: 0.5rem; }
+  h2 { font-size: 1.5rem; font-weight: 600; margin: 2.5rem 0 1rem; color: var(--accent); }
+  h3 { font-size: 1.15rem; font-weight: 600; margin: 1.5rem 0 0.75rem; }
+  p { color: var(--text-dim); margin-bottom: 1rem; }
+  .subtitle { color: var(--text-dim); font-size: 1.05rem; margin-bottom: 2rem; }
+
+  /* Hero */
+  .hero {
+    background: linear-gradient(135deg, #1e1b4b 0%, #0f172a 50%, #0c1220 100%);
+    border: 1px solid var(--border);
+    border-radius: 16px;
+    padding: 3rem;
+    margin-bottom: 2rem;
+    position: relative;
+    overflow: hidden;
+  }
+  .hero::before {
+    content: '';
+    position: absolute;
+    top: -50%;
+    right: -20%;
+    width: 500px;
+    height: 500px;
+    background: radial-gradient(circle, rgba(99,102,241,0.12) 0%, transparent 70%);
+    pointer-events: none;
+  }
+  .hero h1 { position: relative; }
+  .hero .subtitle { position: relative; }
+  .badge-row { display: flex; gap: 0.5rem; flex-wrap: wrap; margin-top: 1.5rem; position: relative; }
+  .badge {
+    display: inline-flex;
+    align-items: center;
+    gap: 0.35rem;
+    padding: 0.3rem 0.75rem;
+    border-radius: 999px;
+    font-size: 0.8rem;
+    font-weight: 500;
+  }
+  .badge-purple { background: rgba(139,92,246,0.15); color: #a78bfa; border: 1px solid rgba(139,92,246,0.25); }
+  .badge-green { background: var(--green-dim); color: var(--green); border: 1px solid rgba(16,185,129,0.25); }
+  .badge-blue { background: var(--blue-dim); color: var(--blue); border: 1px solid rgba(59,130,246,0.25); }
+  .badge-amber { background: var(--amber-dim); color: var(--amber); border: 1px solid rgba(245,158,11,0.25); }
+
+  /* Cards */
+  .card {
+    background: var(--bg-card);
+    border: 1px solid var(--border);
+    border-radius: 12px;
+    padding: 1.5rem;
+    margin-bottom: 1.5rem;
+  }
+  .card-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(320px, 1fr)); gap: 1.5rem; }
+
+  /* Architecture Diagram */
+  .arch-container {
+    background: var(--bg-card);
+    border: 1px solid var(--border);
+    border-radius: 16px;
+    padding: 2rem;
+    margin: 2rem 0;
+    overflow-x: auto;
+  }
+  .arch-flow {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: 0.5rem;
+    flex-wrap: wrap;
+    min-width: 700px;
+  }
+  .arch-node {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    gap: 0.35rem;
+    padding: 1rem 1.25rem;
+    border-radius: 12px;
+    min-width: 110px;
+    text-align: center;
+    position: relative;
+    transition: transform 0.2s;
+  }
+  .arch-node:hover { transform: translateY(-3px); }
+  .arch-node .icon { font-size: 1.5rem; }
+  .arch-node .label { font-size: 0.85rem; font-weight: 600; }
+  .arch-node .desc { font-size: 0.7rem; color: var(--text-dim); }
+
+  .node-observe { background: var(--blue-dim); border: 1px solid rgba(59,130,246,0.3); }
+  .node-evaluate { background: rgba(139,92,246,0.12); border: 1px solid rgba(139,92,246,0.3); }
+  .node-reflect { background: rgba(6,182,212,0.12); border: 1px solid rgba(6,182,212,0.3); }
+  .node-learn { background: var(--green-dim); border: 1px solid rgba(16,185,129,0.3); }
+  .node-evolve { background: var(--amber-dim); border: 1px solid rgba(245,158,11,0.3); }
+  .node-data { background: rgba(236,72,153,0.1); border: 1px solid rgba(236,72,153,0.25); }
+
+  .arch-arrow {
+    font-size: 1.5rem;
+    color: var(--text-dim);
+    flex-shrink: 0;
+  }
+
+  /* Timeline */
+  .timeline {
+    position: relative;
+    padding-left: 2.5rem;
+    margin: 2rem 0;
+  }
+  .timeline::before {
+    content: '';
+    position: absolute;
+    left: 0.75rem;
+    top: 0;
+    bottom: 0;
+    width: 2px;
+    background: linear-gradient(to bottom, var(--accent), var(--cyan), var(--green), var(--amber));
+  }
+  .tl-item {
+    position: relative;
+    margin-bottom: 2rem;
+    padding: 1.25rem 1.5rem;
+    background: var(--bg-card);
+    border: 1px solid var(--border);
+    border-radius: 12px;
+  }
+  .tl-item::before {
+    content: '';
+    position: absolute;
+    left: -2.05rem;
+    top: 1.4rem;
+    width: 12px;
+    height: 12px;
+    border-radius: 50%;
+    border: 2px solid var(--accent);
+    background: var(--bg);
+  }
+  .tl-item.night::before { border-color: var(--cyan); }
+  .tl-item.morning::before { border-color: var(--green); }
+  .tl-item.action::before { border-color: var(--amber); }
+  .tl-item .tl-time {
+    font-size: 0.8rem;
+    font-weight: 600;
+    color: var(--cyan);
+    margin-bottom: 0.35rem;
+  }
+  .tl-item.morning .tl-time { color: var(--green); }
+  .tl-item.action .tl-time { color: var(--amber); }
+  .tl-item .tl-title { font-weight: 600; margin-bottom: 0.5rem; }
+  .tl-item .tl-desc { font-size: 0.9rem; color: var(--text-dim); }
+
+  /* Flowchart-style dream */
+  .flow-box {
+    display: flex;
+    flex-direction: column;
+    gap: 0.5rem;
+  }
+  .flow-step {
+    display: flex;
+    align-items: flex-start;
+    gap: 1rem;
+    padding: 1rem;
+    background: var(--bg-card2);
+    border-radius: 8px;
+    border-left: 3px solid var(--accent);
+  }
+  .flow-step.step-error { border-left-color: var(--red); }
+  .flow-step.step-waste { border-left-color: var(--amber); }
+  .flow-step.step-model { border-left-color: var(--cyan); }
+  .flow-step.step-output { border-left-color: var(--green); }
+  .flow-step .step-num {
+    flex-shrink: 0;
+    width: 28px;
+    height: 28px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    border-radius: 50%;
+    background: var(--accent);
+    color: #fff;
+    font-size: 0.8rem;
+    font-weight: 700;
+  }
+  .flow-step.step-error .step-num { background: var(--red); }
+  .flow-step.step-waste .step-num { background: var(--amber); }
+  .flow-step.step-model .step-num { background: var(--cyan); }
+  .flow-step.step-output .step-num { background: var(--green); }
+  .flow-step .step-content { flex: 1; }
+  .flow-step .step-title { font-weight: 600; font-size: 0.95rem; margin-bottom: 0.25rem; }
+  .flow-step .step-desc { font-size: 0.85rem; color: var(--text-dim); }
+  .flow-step ul { margin: 0.35rem 0 0 1rem; font-size: 0.85rem; color: var(--text-dim); }
+  .flow-step li { margin-bottom: 0.15rem; }
+
+  /* Feishu mockup */
+  .feishu-card {
+    background: #fff;
+    border-radius: 12px;
+    padding: 1.5rem;
+    color: #1f2937;
+    max-width: 420px;
+    margin: 1.5rem auto;
+    box-shadow: 0 4px 24px rgba(0,0,0,0.3);
+    font-size: 0.9rem;
+  }
+  .feishu-card .fc-header {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    padding-bottom: 0.75rem;
+    border-bottom: 1px solid #e5e7eb;
+    margin-bottom: 0.75rem;
+  }
+  .feishu-card .fc-header .fc-icon {
+    width: 32px; height: 32px;
+    background: linear-gradient(135deg, #3b82f6, #8b5cf6);
+    border-radius: 8px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    color: #fff;
+    font-size: 1rem;
+  }
+  .feishu-card .fc-header .fc-title { font-weight: 600; }
+  .feishu-card .fc-section { margin-bottom: 0.75rem; }
+  .feishu-card .fc-section-title { font-weight: 600; font-size: 0.85rem; margin-bottom: 0.35rem; color: #374151; }
+  .feishu-card .fc-row { display: flex; justify-content: space-between; font-size: 0.8rem; color: #6b7280; padding: 0.1rem 0; }
+  .feishu-card .fc-proposal {
+    background: #f9fafb;
+    border-radius: 8px;
+    padding: 0.75rem;
+    margin-bottom: 0.5rem;
+  }
+  .feishu-card .fc-proposal-title { font-weight: 600; font-size: 0.85rem; margin-bottom: 0.25rem; }
+  .feishu-card .fc-proposal-desc { font-size: 0.78rem; color: #6b7280; margin-bottom: 0.5rem; }
+  .feishu-card .fc-btns { display: flex; gap: 0.5rem; }
+  .feishu-card .fc-btn {
+    padding: 0.3rem 0.75rem;
+    border-radius: 6px;
+    font-size: 0.78rem;
+    font-weight: 500;
+    border: none;
+    cursor: pointer;
+  }
+  .fc-btn-approve { background: #3b82f6; color: #fff; }
+  .fc-btn-modify { background: #f3f4f6; color: #374151; border: 1px solid #d1d5db; }
+  .fc-btn-reject { background: #fef2f2; color: #ef4444; border: 1px solid #fecaca; }
+
+  /* Ref table */
+  .ref-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 1rem; }
+  .ref-card {
+    background: var(--bg-card2);
+    border: 1px solid var(--border);
+    border-radius: 10px;
+    padding: 1.25rem;
+  }
+  .ref-card .ref-source {
+    font-size: 0.75rem;
+    color: var(--cyan);
+    margin-bottom: 0.5rem;
+    font-family: 'SF Mono', monospace;
+  }
+  .ref-card .ref-title { font-weight: 600; margin-bottom: 0.5rem; }
+  .ref-card .ref-desc { font-size: 0.85rem; color: var(--text-dim); }
+
+  /* DB schema */
+  .db-table {
+    background: var(--bg-card2);
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    padding: 1rem;
+    margin-bottom: 1rem;
+    font-family: 'SF Mono', 'Fira Code', monospace;
+    font-size: 0.8rem;
+  }
+  .db-table .db-name {
+    color: var(--cyan);
+    font-weight: 700;
+    margin-bottom: 0.5rem;
+  }
+  .db-table .db-col { color: var(--text-dim); padding: 0.1rem 0; }
+  .db-table .db-col span { color: var(--amber); }
+
+  /* Safety */
+  .safety-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(240px, 1fr)); gap: 1rem; }
+  .safety-item {
+    background: var(--bg-card2);
+    border: 1px solid var(--border);
+    border-radius: 10px;
+    padding: 1.25rem;
+    text-align: center;
+  }
+  .safety-item .safety-icon { font-size: 2rem; margin-bottom: 0.5rem; }
+  .safety-item .safety-title { font-weight: 600; font-size: 0.95rem; margin-bottom: 0.35rem; }
+  .safety-item .safety-desc { font-size: 0.82rem; color: var(--text-dim); }
+
+  /* File tree */
+  .file-tree {
+    font-family: 'SF Mono', 'Fira Code', monospace;
+    font-size: 0.82rem;
+    line-height: 1.8;
+    color: var(--text-dim);
+    background: var(--bg-card2);
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    padding: 1.25rem;
+    overflow-x: auto;
+  }
+  .file-tree .dir { color: var(--cyan); font-weight: 600; }
+  .file-tree .file { color: var(--text); }
+  .file-tree .comment { color: var(--text-dim); font-style: italic; }
+
+  /* Quality formula */
+  .formula {
+    background: var(--bg-card2);
+    border: 1px solid var(--border);
+    border-radius: 10px;
+    padding: 1.5rem 2rem;
+    margin: 1rem 0;
+    font-family: 'SF Mono', 'Fira Code', monospace;
+    font-size: 0.88rem;
+    text-align: center;
+    line-height: 2;
+  }
+  .formula .w { color: var(--amber); }
+  .formula .var { color: var(--cyan); }
+  .formula .op { color: var(--text-dim); }
+
+  /* Integration table */
+  .int-table {
+    width: 100%;
+    border-collapse: collapse;
+    margin: 1rem 0;
+    font-size: 0.88rem;
+  }
+  .int-table th {
+    text-align: left;
+    padding: 0.75rem 1rem;
+    background: var(--bg-card2);
+    color: var(--text-dim);
+    font-weight: 600;
+    font-size: 0.8rem;
+    text-transform: uppercase;
+    letter-spacing: 0.05em;
+  }
+  .int-table td {
+    padding: 0.65rem 1rem;
+    border-bottom: 1px solid var(--border);
+  }
+  .int-table .hook {
+    font-family: 'SF Mono', monospace;
+    font-size: 0.8rem;
+    color: var(--cyan);
+    background: rgba(6,182,212,0.1);
+    padding: 0.15rem 0.5rem;
+    border-radius: 4px;
+  }
+  .int-table .no-mod { color: var(--green); }
+
+  /* Phase timeline */
+  .phases { display: grid; grid-template-columns: repeat(4, 1fr); gap: 1rem; margin: 1.5rem 0; }
+  .phase {
+    background: var(--bg-card2);
+    border: 1px solid var(--border);
+    border-radius: 10px;
+    padding: 1.25rem;
+    position: relative;
+  }
+  .phase .phase-num {
+    font-size: 2rem;
+    font-weight: 800;
+    color: var(--accent);
+    opacity: 0.3;
+    margin-bottom: 0.25rem;
+  }
+  .phase .phase-title { font-weight: 600; font-size: 0.95rem; margin-bottom: 0.5rem; }
+  .phase ul { margin-left: 1rem; font-size: 0.82rem; color: var(--text-dim); }
+  .phase li { margin-bottom: 0.25rem; }
+
+  /* Arrow connector between phases */
+  .phase:not(:last-child)::after {
+    content: '→';
+    position: absolute;
+    right: -1.2rem;
+    top: 50%;
+    transform: translateY(-50%);
+    font-size: 1.5rem;
+    color: var(--text-dim);
+  }
+
+  /* Scrollbar */
+  ::-webkit-scrollbar { width: 6px; height: 6px; }
+  ::-webkit-scrollbar-track { background: transparent; }
+  ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
+
+  @media (max-width: 768px) {
+    body { padding: 1rem; }
+    .phases { grid-template-columns: 1fr 1fr; }
+    .phase:not(:last-child)::after { display: none; }
+    .arch-flow { flex-direction: column; }
+    .arch-arrow { transform: rotate(90deg); }
+  }
+</style>
+</head>
+<body>
+
+<!-- ═══════ Hero ═══════ -->
+<div class="hero">
+  <h1>Hermes Agent 自我优化与持续进化系统</h1>
+  <p class="subtitle">一套完全插件化的 agent 自我进化机制 — 通过每日"梦境整理"和"飞书审批流"实现闭环自我优化</p>
+  <div class="badge-row">
+    <span class="badge badge-purple">零侵入核心代码</span>
+    <span class="badge badge-blue">完全插件化</span>
+    <span class="badge badge-green">GLM-5.1 / Qwen 降级</span>
+    <span class="badge badge-amber">飞书审批流</span>
+  </div>
+</div>
+
+<!-- ═══════ Architecture ═══════ -->
+<h2>核心架构：五层闭环</h2>
+<p>观察 → 评估 → 反思 → 学习 → 进化，形成持续自我改进的闭环循环。</p>
+
+<div class="arch-container">
+  <div class="arch-flow">
+    <div class="arch-node node-observe">
+      <span class="icon">📡</span>
+      <span class="label">观察</span>
+      <span class="desc">遥测采集<br>post_tool_call</span>
+    </div>
+    <span class="arch-arrow">→</span>
+    <div class="arch-node node-evaluate">
+      <span class="icon">📊</span>
+      <span class="label">评估</span>
+      <span class="desc">质量评分<br>on_session_end</span>
+    </div>
+    <span class="arch-arrow">→</span>
+    <div class="arch-node node-reflect">
+      <span class="icon">🌙</span>
+      <span class="label">反思</span>
+      <span class="desc">梦境整理<br>凌晨 1:00</span>
+    </div>
+    <span class="arch-arrow">→</span>
+    <div class="arch-node node-learn">
+      <span class="icon">🧠</span>
+      <span class="label">学习</span>
+      <span class="desc">进化提案<br>策略生成</span>
+    </div>
+    <span class="arch-arrow">→</span>
+    <div class="arch-node node-evolve">
+      <span class="icon">🚀</span>
+      <span class="label">进化</span>
+      <span class="desc">飞书审批 → 执行<br>19:00 推送</span>
+    </div>
+    <span class="arch-arrow">↩</span>
+    <div class="arch-node node-data">
+      <span class="icon">💾</span>
+      <span class="label">存储</span>
+      <span class="desc">evolution.db<br>strategies.json</span>
+    </div>
+  </div>
+</div>
+
+<!-- ═══════ Daily Flow ═══════ -->
+<h2>每日流程</h2>
+<p>从凌晨梦境整理到晚间飞书推送，一天的自动进化循环。</p>
+
+<div class="timeline">
+  <div class="tl-item night">
+    <div class="tl-time">01:00 — 梦境整理（自动执行）</div>
+    <div class="tl-title">DreamEngine.run() — 分析前日全部 session</div>
+    <div class="flow-box" style="margin-top: 1rem;">
+      <div class="flow-step">
+        <div class="step-num">1</div>
+        <div class="step-content">
+          <div class="step-title">数据汇总</div>
+          <div class="step-desc">读取 state.db（只读）+ evolution.db，计算各 session 质量评分</div>
+        </div>
+      </div>
+      <div class="flow-step step-error">
+        <div class="step-num">2</div>
+        <div class="step-content">
+          <div class="step-title">错误分析（重点）</div>
+          <ul>
+            <li>工具调用失败统计（按工具、按错误类型分布）</li>
+            <li>反复重试检测（同一工具同一 session 调用 > 2次）</li>
+            <li>未完成 session、用户纠正消息、API 错误</li>
+            <li>错误连锁分析（一个失败是否引发后续失败）</li>
+          </ul>
+        </div>
+      </div>
+      <div class="flow-step step-waste">
+        <div class="step-num">3</div>
+        <div class="step-content">
+          <div class="step-title">时间浪费分析（重点）</div>
+          <ul>
+            <li>耗时最长的工具调用 TOP 10</li>
+            <li>重复操作（多次读同一文件、重复搜索）</li>
+            <li>低效 session（迭代轮数过多、工具调用过多）</li>
+            <li>可缩短的工具调用链</li>
+          </ul>
+        </div>
+      </div>
+      <div class="flow-step step-model">
+        <div class="step-num">4</div>
+        <div class="step-content">
+          <div class="step-title">深度反思（GLM-5.1 优先 / Qwen 降级）</div>
+          <div class="step-desc">将分析结果发送到本地模型，产出结构化 ReflectionReport：错误根因 + 浪费根因 + 可操作建议</div>
+        </div>
+      </div>
+      <div class="flow-step step-output">
+        <div class="step-num">5</div>
+        <div class="step-content">
+          <div class="step-title">模式识别 + 生成进化提案</div>
+          <div class="step-desc">高成功率模式 → 候选技能 ｜ 重复错误 → 候选规避策略 ｜ 系统性浪费 → 候选流程优化</div>
+        </div>
+      </div>
+    </div>
+  </div>
+
+  <div class="tl-item morning">
+    <div class="tl-time">19:00 — 飞书推送进化方案</div>
+    <div class="tl-title">FeishuNotifier.send_daily_report()</div>
+    <div class="tl-desc">读取当日凌晨产出的 pending_approval 提案，格式化为飞书交互卡片推送给用户。</div>
+  </div>
+
+  <div class="tl-item action">
+    <div class="tl-time">用户审批后 — 执行进化</div>
+    <div class="tl-title">EvolutionExecutor.execute()</div>
+    <div class="tl-desc">飞书回调触发执行：技能创建 / 策略调整 / 记忆更新 / 工具偏好变更。执行后自动创建 A/B 测试追踪单元。</div>
+  </div>
+</div>
+
+<!-- ═══════ Feishu Mockup ═══════ -->
+<h3>飞书卡片消息预览</h3>
+<div class="feishu-card">
+  <div class="fc-header">
+    <div class="fc-icon">🌅</div>
+    <div>
+      <div class="fc-title">Hermes 每日进化报告 (2026-04-18)</div>
+    </div>
+  </div>
+  <div class="fc-section">
+    <div class="fc-section-title">📊 前日概况</div>
+    <div class="fc-row"><span>完成 sessions</span><span>23</span></div>
+    <div class="fc-row"><span>平均质量评分</span><span>0.78 ↑0.03</span></div>
+    <div class="fc-row"><span>工具调用 / 成功率</span><span>156次 / 91%</span></div>
+  </div>
+  <div class="fc-section">
+    <div class="fc-section-title">❌ 错误分析</div>
+    <div class="fc-row"><span>browser_tool 失败</span><span>5次 (超时3次)</span></div>
+    <div class="fc-row"><span>未完成 session</span><span>2个</span></div>
+    <div class="fc-row"><span>用户纠正</span><span>3次</span></div>
+  </div>
+  <div class="fc-section">
+    <div class="fc-section-title">⏱️ 时间浪费分析</div>
+    <div class="fc-row"><span>重复读取同一文件</span><span>8次</span></div>
+    <div class="fc-row"><span>web_search→browser 冗余</span><span>6次</span></div>
+    <div class="fc-row"><span>平均迭代轮数</span><span>12轮 (理想8轮)</span></div>
+  </div>
+  <hr style="border-color:#e5e7eb; margin:0.75rem 0;">
+  <div class="fc-section">
+    <div class="fc-section-title">📋 进化提案 (3项)</div>
+    <div class="fc-proposal">
+      <div class="fc-proposal-title">[1] 🛠️ 创建技能: web_search_pipeline</div>
+      <div class="fc-proposal-desc">预期: 搜索任务成功率 +15% ｜ 风险: low</div>
+      <div class="fc-btns">
+        <button class="fc-btn fc-btn-approve">通过</button>
+        <button class="fc-btn fc-btn-modify">修改</button>
+        <button class="fc-btn fc-btn-reject">拒绝</button>
+      </div>
+    </div>
+    <div class="fc-proposal">
+      <div class="fc-proposal-title">[2] ⚡ 策略调整: 优先 grep 替代 find</div>
+      <div class="fc-proposal-desc">预期: 文件搜索效率 +25% ｜ 风险: low</div>
+      <div class="fc-btns">
+        <button class="fc-btn fc-btn-approve">通过</button>
+        <button class="fc-btn fc-btn-modify">修改</button>
+        <button class="fc-btn fc-btn-reject">拒绝</button>
+      </div>
+    </div>
+    <div class="fc-proposal">
+      <div class="fc-proposal-title">[3] 🧠 记忆更新: 用户偏好中文回复</div>
+      <div class="fc-proposal-desc">预期: 用户满意度提升 ｜ 风险: low</div>
+      <div class="fc-btns">
+        <button class="fc-btn fc-btn-approve">通过</button>
+        <button class="fc-btn fc-btn-modify">修改</button>
+        <button class="fc-btn fc-btn-reject">拒绝</button>
+      </div>
+    </div>
+  </div>
+</div>
+
+<!-- ═══════ Quality Score ═══════ -->
+<h2>质量评分体系</h2>
+<p>每个 session 结束时自动计算复合质量评分，零 API 成本。</p>
+
+<div class="formula">
+  <span class="var">session_quality</span> <span class="op">=</span>
+  <span class="w">0.40</span> × <span class="var">completion_rate</span> <span class="op">+</span>
+  <span class="w">0.20</span> × <span class="var">efficiency_score</span> <span class="op">+</span>
+  <span class="w">0.15</span> × <span class="var">cost_efficiency</span> <span class="op">+</span>
+  <span class="w">0.25</span> × <span class="var">satisfaction_proxy</span>
+</div>
+
+<div class="card-grid">
+  <div class="card">
+    <h3>completion_rate <span style="color:var(--w);font-size:0.8rem;">权重 0.40</span></h3>
+    <p>任务是否完成。completed=1.0, interrupted=0.5, failed=0.0</p>
+  </div>
+  <div class="card">
+    <h3>efficiency_score <span style="color:var(--w);font-size:0.8rem;">权重 0.20</span></h3>
+    <p>迭代效率。理想轮数 / 实际轮数，上限 1.0</p>
+  </div>
+  <div class="card">
+    <h3>cost_efficiency <span style="color:var(--w);font-size:0.8rem;">权重 0.15</span></h3>
+    <p>工具使用效率。期望调用数 / 实际调用数，上限 1.0</p>
+  </div>
+  <div class="card">
+    <h3>satisfaction_proxy <span style="color:var(--w);font-size:0.8rem;">权重 0.25</span></h3>
+    <p>满意度代理。单轮完成=0.9, 多轮完成=0.75, 预算耗尽=-0.2</p>
+  </div>
+</div>
+
+<!-- ═══════ Claude Code References ═══════ -->
+<h2>Claude Code 设计参考</h2>
+<p>本方案借鉴了 Claude Code 开源项目中的四个核心设计模式。</p>
+
+<div class="ref-grid">
+  <div class="ref-card">
+    <div class="ref-source">plugins/hookify/agents/conversation-analyzer.md</div>
+    <div class="ref-title">梦境整理 ← conversation-analyzer</div>
+    <div class="ref-desc">
+      分析对话历史 → 识别纠正/沮丧/重复问题信号 → 提取可匹配正则规则 → 按严重程度分级（高/中/低）。
+      <br><br><b>我们的扩展</b>：从手动触发升级为每日自动运行，增加错误分析和时间浪费分析。
+    </div>
+  </div>
+  <div class="ref-card">
+    <div class="ref-source">plugins/ralph-wiggum/</div>
+    <div class="ref-title">进化执行 ← Ralph Wiggum</div>
+    <div class="ref-desc">
+      自我引用反馈环：Stop hook 拦截退出 → 重喂 prompt → agent 看到自己的修改 → 自动迭代直到满足条件。
+      <br><br><b>我们的扩展</b>：进化执行后创建验证追踪单元（类似 completion_promise），不满足条件自动回滚。
+    </div>
+  </div>
+  <div class="ref-card">
+    <div class="ref-source">plugins/learning-output-style/</div>
+    <div class="ref-title">策略注入 ← SessionStart hook</div>
+    <div class="ref-desc">
+      通过 SessionStart hook 在每个 session 自动注入行为上下文，等效于 CLAUDE.md 但更灵活。
+      <br><br><b>我们的扩展</b>：使用 pre_llm_call 钩子注入已学习的行为提示，完全隔离于核心代码。
+    </div>
+  </div>
+  <div class="ref-card">
+    <div class="ref-source">plugins/hookify/core/rule_engine.py</div>
+    <div class="ref-title">规则引擎 ← rule_engine</div>
+    <div class="ref-desc">
+      LRU 缓存编译正则（128 上限），支持 regex_match/contains/equals/not_contains，区分 block/warn 级别。
+      <br><br><b>我们的扩展</b>：策略注入条件化，根据 session 特征（平台/任务类型/模型）匹配最相关规则。
+    </div>
+  </div>
+</div>
+
+<!-- ═══════ Isolation ═══════ -->
+<h2>隔离策略：零侵入核心代码</h2>
+<p>所有功能以插件形式实现，通过钩子集成，不修改任何上游核心文件。</p>
+
+<div class="card-grid">
+  <div class="card">
+    <h3>插件文件结构</h3>
+    <div class="file-tree">
+<span class="dir">self_evolution/</span>
+├── plugin.yaml
+├── __init__.py          <span class="comment"># register(ctx)</span>
+├── db.py                <span class="comment"># 独立 SQLite</span>
+├── hooks.py             <span class="comment"># 3个钩子</span>
+├── quality_scorer.py    <span class="comment"># 质量评分</span>
+├── <span class="dir">reflection_engine.py</span>  <span class="comment"># 梦境整理</span>
+├── rule_engine.py       <span class="comment"># 条件匹配</span>
+├── evolution_proposer.py
+├── evolution_executor.py
+├── feishu_notifier.py
+├── strategy_injector.py
+├── strategy_store.py
+├── cron_jobs.py
+├── models.py
+├── <span class="dir">agents/</span>
+│   ├── dream_analyzer.md
+│   └── evolution_planner.md
+└── <span class="dir">prompts/</span>
+    └── reflection.md
+    </div>
+  </div>
+  <div class="card">
+    <h3>钩子集成方式</h3>
+    <table class="int-table">
+      <tr><th>功能</th><th>集成方式</th><th>修改核心</th></tr>
+      <tr><td>工具调用遥测</td><td><span class="hook">post_tool_call</span></td><td class="no-mod">NO</td></tr>
+      <tr><td>Session 评分</td><td><span class="hook">on_session_end</span></td><td class="no-mod">NO</td></tr>
+      <tr><td>策略注入</td><td><span class="hook">pre_llm_call</span></td><td class="no-mod">NO</td></tr>
+      <tr><td>定时任务</td><td>cron/jobs.json</td><td class="no-mod">NO</td></tr>
+      <tr><td>飞书通知</td><td>gateway/ 飞书网关</td><td class="no-mod">NO</td></tr>
+      <tr><td>技能创建</td><td>skill_manager_tool</td><td class="no-mod">NO</td></tr>
+      <tr><td>记忆更新</td><td>memory_tool</td><td class="no-mod">NO</td></tr>
+      <tr><td>历史数据</td><td>state.db 只读</td><td class="no-mod">NO</td></tr>
+    </table>
+  </div>
+</div>
+
+<!-- ═══════ Database ═══════ -->
+<h2>独立数据库设计</h2>
+<p>独立于核心 state.db，7 张表存储于 <code>~/.hermes/self_evolution/evolution.db</code></p>
+
+<div class="card-grid" style="grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));">
+  <div class="db-table">
+    <div class="db-name">tool_invocations</div>
+    <div class="db-col">session_id <span>TEXT</span></div>
+    <div class="db-col">tool_name <span>TEXT</span></div>
+    <div class="db-col">duration_ms <span>INT</span></div>
+    <div class="db-col">success <span>BOOL</span></div>
+    <div class="db-col">error_type <span>TEXT</span></div>
+  </div>
+  <div class="db-table">
+    <div class="db-name">session_scores</div>
+    <div class="db-col">session_id <span>TEXT PK</span></div>
+    <div class="db-col">composite_score <span>REAL</span></div>
+    <div class="db-col">completion_rate <span>REAL</span></div>
+    <div class="db-col">efficiency_score <span>REAL</span></div>
+    <div class="db-col">task_category <span>TEXT</span></div>
+  </div>
+  <div class="db-table">
+    <div class="db-name">outcome_signals</div>
+    <div class="db-col">session_id <span>TEXT</span></div>
+    <div class="db-col">signal_type <span>TEXT</span></div>
+    <div class="db-col">signal_value <span>REAL</span></div>
+    <div class="db-col">metadata <span>TEXT JSON</span></div>
+  </div>
+  <div class="db-table">
+    <div class="db-name">reflection_reports</div>
+    <div class="db-col">sessions_analyzed <span>INT</span></div>
+    <div class="db-col">avg_score <span>REAL</span></div>
+    <div class="db-col">error_summary <span>TEXT</span></div>
+    <div class="db-col">worst_patterns <span>TEXT JSON</span></div>
+    <div class="db-col">recommendations <span>TEXT JSON</span></div>
+  </div>
+  <div class="db-table">
+    <div class="db-name">evolution_proposals</div>
+    <div class="db-col">id <span>TEXT PK</span></div>
+    <div class="db-col">proposal_type <span>TEXT</span></div>
+    <div class="db-col">title, description <span>TEXT</span></div>
+    <div class="db-col">status <span>TEXT</span> <span style="color:var(--green);">pending→approved→executed</span></div>
+  </div>
+  <div class="db-table">
+    <div class="db-name">improvement_units</div>
+    <div class="db-col">proposal_id <span>TEXT FK</span></div>
+    <div class="db-col">baseline_score <span>REAL</span></div>
+    <div class="db-col">current_score <span>REAL</span></div>
+    <div class="db-col">status <span>TEXT</span> <span style="color:var(--green);">active→promoted</span> / <span style="color:var(--red);">reverted</span></div>
+  </div>
+  <div class="db-table">
+    <div class="db-name">strategy_versions</div>
+    <div class="db-col">version <span>INT</span></div>
+    <div class="db-col">strategies_json <span>TEXT</span></div>
+    <div class="db-col">avg_score <span>REAL</span></div>
+    <div class="db-col">active_from / active_until <span>REAL</span></div>
+  </div>
+</div>
+
+<!-- ═══════ Safety ═══════ -->
+<h2>安全机制：防止退化漂移</h2>
+<p>六层防护确保进化方向正确且可回滚。</p>
+
+<div class="safety-grid">
+  <div class="safety-item">
+    <div class="safety-icon">🗄️</div>
+    <div class="safety-title">独立数据库</div>
+    <div class="safety-desc">不碰 state.db，上游 schema 变更无影响</div>
+  </div>
+  <div class="safety-item">
+    <div class="safety-icon">🔒</div>
+    <div class="safety-title">只读核心</div>
+    <div class="safety-desc">所有集成通过钩子完成，不修改核心文件</div>
+  </div>
+  <div class="safety-item">
+    <div class="safety-icon">🚧</div>
+    <div class="safety-title">人工闸门</div>
+    <div class="safety-desc">进化方案必须通过飞书审批，不自动执行</div>
+  </div>
+  <div class="safety-item">
+    <div class="safety-icon">⏪</div>
+    <div class="safety-title">版本回滚</div>
+    <div class="safety-desc">策略变更版本化，评分连续下降自动回滚</div>
+  </div>
+  <div class="safety-item">
+    <div class="safety-icon">🛡️</div>
+    <div class="safety-title">有界变更</div>
+    <div class="safety-desc">只能写 PERFORMANCE.md、创建 learned skills</div>
+  </div>
+  <div class="safety-item">
+    <div class="safety-icon">📚</div>
+    <div class="safety-title">拒绝学习</div>
+    <div class="safety-desc">被拒绝的提案会被分析，避免重复提出</div>
+  </div>
+</div>
+
+<!-- ═══════ Implementation Phases ═══════ -->
+<h2>实施路径</h2>
+<p>四个阶段，每阶段约 1 周。</p>
+
+<div class="phases">
+  <div class="phase">
+    <div class="phase-num">01</div>
+    <div class="phase-title">基础设施</div>
+    <ul>
+      <li>插件骨架</li>
+      <li>独立数据库 db.py</li>
+      <li>遥测采集 hooks.py</li>
+      <li>质量评分器</li>
+    </ul>
+  </div>
+  <div class="phase">
+    <div class="phase-num">02</div>
+    <div class="phase-title">梦境整理</div>
+    <ul>
+      <li>反思引擎 reflection_engine.py</li>
+      <li>错误分析 + 时间浪费分析</li>
+      <li>进化提案生成器</li>
+      <li>凌晨 1:00 cron 注册</li>
+    </ul>
+  </div>
+  <div class="phase">
+    <div class="phase-num">03</div>
+    <div class="phase-title">飞书审批</div>
+    <ul>
+      <li>飞书通知器 feishu_notifier.py</li>
+      <li>卡片消息 + 按钮回调</li>
+      <li>19:00 cron 注册</li>
+    </ul>
+  </div>
+  <div class="phase">
+    <div class="phase-num">04</div>
+    <div class="phase-title">进化执行</div>
+    <ul>
+      <li>进化执行器 + 回滚</li>
+      <li>策略注入 + 规则引擎</li>
+      <li>策略存储 + 版本管理</li>
+      <li>A/B 测试追踪</li>
+    </ul>
+  </div>
+</div>
+
+<!-- ═══════ Model Config ═══════ -->
+<h2>模型配置</h2>
+<div class="card">
+  <div class="file-tree">
+<span class="comment"># ~/.hermes/self_evolution/config.yaml</span>
+<span class="var">model:</span>
+  <span class="var">primary:</span>
+    <span class="var">provider:</span> <span style="color:var(--green);">"zhipu"</span>           <span class="comment"># 优先使用 GLM-5.1</span>
+    <span class="var">model:</span> <span style="color:var(--green);">"glm-5.1"</span>
+  <span class="var">fallback:</span>
+    <span class="var">provider:</span> <span style="color:var(--cyan);">"ollama"</span>         <span class="comment"># GLM 不可用时降级到本地 Qwen</span>
+    <span class="var">model:</span> <span style="color:var(--cyan);">"qwen3:32b"</span>
+    <span class="var">base_url:</span> <span style="color:var(--cyan);">"http://localhost:11434"</span>
+
+<span class="var">schedule:</span>
+  <span class="var">dream_time:</span> <span style="color:var(--amber);">"0 1 * * *"</span>      <span class="comment"># 凌晨 1:00</span>
+  <span class="var">propose_time:</span> <span style="color:var(--amber);">"0 19 * * *"</span>    <span class="comment"># 当日 19:00</span>
+  </div>
+</div>
+
+<!-- ═══════ Footer ═══════ -->
+<div style="margin-top: 4rem; padding-top: 2rem; border-top: 1px solid var(--border); text-align: center; color: var(--text-dim); font-size: 0.85rem;">
+  <p>Hermes Agent Self-Evolution System — Designed with reference from Claude Code open-source patterns</p>
+  <p style="margin-top: 0.5rem; font-size: 0.78rem;">conversation-analyzer · Ralph Wiggum · learning-output-style · rule_engine</p>
+</div>
+
+</body>
+</html>
diff --git a/self_evolution/__init__.py b/self_evolution/__init__.py
new file mode 100644
index 0000000000..d2b4001c2b
--- /dev/null
+++ b/self_evolution/__init__.py
@@ -0,0 +1,43 @@
+"""
+Self Evolution Plugin
+=====================
+
+Agent self-optimization and continuous evolution system.
+
+Architecture:
+  - Telemetry: collects tool/session data via hooks
+  - Quality Scorer: evaluates session outcomes
+  - Dream Engine: nightly reflection at 1:00
+  - Evolution Proposer: generates improvement proposals
+  - Feishu Notifier: pushes proposals at 19:00 for user approval
+  - Evolution Executor: applies approved changes with rollback support
+  - Strategy Injector: injects learned hints into sessions
+
+Design references from Claude Code:
+  - conversation-analyzer (hookify): dream analysis pattern
+  - Ralph Wiggum: iterative evolution with rollback
+  - learning-output-style: session-start strategy injection
+  - rule_engine (hookify): conditional strategy matching
+"""
+
+from __future__ import annotations
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def register(ctx) -> None:
+    """Plugin entry point — called by Hermes PluginManager.
+
+    Registers:
+      - 3 hooks: post_tool_call, on_session_end, pre_llm_call
+      - 3 slash commands: /evolve, /reflect, /evolution_status
+    """
+    from self_evolution.db import init_db
+    init_db()
+
+    from self_evolution.hooks import register_all as register_hooks
+    register_hooks(ctx)
+
+    logger.info("self_evolution plugin loaded: 3 hooks, telemetry active")
diff --git a/self_evolution/agents/dream_analyzer.md b/self_evolution/agents/dream_analyzer.md
new file mode 100644
index 0000000000..60a631923f
--- /dev/null
+++ b/self_evolution/agents/dream_analyzer.md
@@ -0,0 +1,82 @@
+---
+name: dream_analyzer
+description: >
+  用于每日梦境整理的分析 agent。
+  分析前日所有 session 的工具调用、错误模式、时间浪费，
+  产出结构化的反思报告和进化提案。
+model: inherit
+tools: ["Read", "Grep"]
+---
+
+你是 Hermes Agent 的性能分析专家。你的任务是分析 agent 的运行数据，识别问题和优化机会。
+
+## 分析流程
+
+### 1. 错误信号检测
+
+参考 Claude Code conversation-analyzer 的模式，搜索以下信号：
+
+**显式纠正信号：**
+- 用户消息包含 "不对"、"错误"、"重试"、"不要"
+- 用户消息包含 "stop"、"wrong"、"retry"、"don't"
+
+**沮丧反应信号：**
+- "为什么你做了X？"、"那不是我说的"
+- "太慢了"、"浪费时间"
+
+**用户回退信号：**
+- 用户撤销了 agent 的修改
+- 用户手动修复了 agent 的问题
+
+**重复问题：**
+- 同类错误在多个 session 中出现
+
+### 2. 错误严重程度分级
+
+**高严重度（应创建规避规则）：**
+- 系统性工具失败（同一工具多次失败）
+- 安全相关问题
+- 数据丢失风险
+
+**中严重度（应警告）：**
+- 效率问题（重复操作、不必要的步骤）
+- 风格不一致
+- 非关键错误
+
+**低严重度（可选优化）：**
+- 用户偏好
+- 非关键的模式改进
+
+### 3. 时间浪费分析
+
+重点分析：
+- 耗时最长的工具调用
+- 重复操作（多次读同一文件、重复搜索）
+- 工具调用链中的不必要步骤
+- 迭代轮数过多的 session
+
+### 4. 输出格式
+
+必须按 JSON 格式输出：
+
+```json
+{
+  "worst_patterns": ["模式描述1", "模式描述2"],
+  "best_patterns": ["成功模式描述1"],
+  "tool_insights": {
+    "tool_name": {"success_rate": 0.95, "avg_duration_ms": 500, "recommendation": "建议"}
+  },
+  "recommendations": [
+    "具体的可操作建议1",
+    "具体的可操作建议2"
+  ]
+}
+```
+
+### 5. 质量标准
+
+- 每个建议都必须是具体的、可操作的
+- 包含实际的例子
+- 解释为什么这个问题值得修复
+- 提供可直接使用的规则或策略
+- 不要对假设性讨论产生误报
diff --git a/self_evolution/agents/evolution_planner.md b/self_evolution/agents/evolution_planner.md
new file mode 100644
index 0000000000..7bdbee0941
--- /dev/null
+++ b/self_evolution/agents/evolution_planner.md
@@ -0,0 +1,51 @@
+---
+name: evolution_planner
+description: >
+  用于将反思报告转化为具体进化方案的规划 agent。
+  生成技能创建、策略调整、记忆更新等具体方案。
+model: inherit
+tools: ["Read", "Grep"]
+---
+
+你是 Hermes Agent 的进化规划专家。你的任务是将性能分析结论转化为具体的、可执行的进化方案。
+
+## 方案类型
+
+### 技能创建 (skill)
+当发现可复用的成功模式时，建议创建新技能：
+- 描述技能的触发条件和执行步骤
+- 包含具体的 prompt 模板
+- 标注适用的场景
+
+### 策略调整 (strategy)
+当发现效率问题或错误模式时，建议创建策略规则：
+- 定义匹配条件（工具名、平台、任务类型）
+- 提供策略提示文本
+- 标注严重程度（hint | avoid | prefer）
+
+### 记忆更新 (memory)
+当发现关于用户偏好或环境特性时，建议更新记忆：
+- 写入 PERFORMANCE.md
+- 内容简洁、可操作
+- 避免主观判断
+
+### 工具偏好 (tool_preference)
+当发现工具使用效率差异时，建议调整偏好：
+- 基于数据说明为什么A优于B
+- 提供具体的替换建议
+
+## 输出格式
+
+每个方案必须包含：
+1. **标题**：简短描述（<50字）
+2. **描述**：详细说明变更内容
+3. **预期影响**：定量或定性的改善预期
+4. **风险评估**：low / medium / high
+5. **回滚方案**：如何安全地撤销此变更
+
+## 质量标准
+
+- 每个方案只变更一个变量
+- 方案必须是可测量、可回滚的
+- 优先高影响、低风险的方案
+- 每次最多提出 5 个方案
diff --git a/self_evolution/cron_jobs.py b/self_evolution/cron_jobs.py
new file mode 100644
index 0000000000..13ba0c1b81
--- /dev/null
+++ b/self_evolution/cron_jobs.py
@@ -0,0 +1,115 @@
+"""
+Self Evolution Plugin — Cron Job Registration
+==============================================
+
+Registers two cron jobs:
+  1. dream_time (1:00):  Run dream consolidation
+  2. propose_time (19:00): Push proposals via Feishu
+
+Uses Hermes' existing cron system (cron/jobs.json).
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from pathlib import Path
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+from self_evolution.paths import CRON_DIR
+
+CRON_FILE = CRON_DIR / "jobs.json"
+
+DREAM_JOB_ID = "self_evolution_dream"
+PROPOSE_JOB_ID = "self_evolution_propose"
+
+
+def register_cron_jobs():
+    """Register the two self_evolution cron jobs if not already present."""
+    CRON_DIR.mkdir(parents=True, exist_ok=True)
+
+    jobs = _load_jobs()
+
+    # Resolve model config from hermes unified config
+    from self_evolution.reflection_engine import _resolve_runtime_config
+    runtime = _resolve_runtime_config()
+    model = runtime.get("model", "")
+    provider = runtime.get("provider", "")
+
+    # Dream consolidation at 1:00
+    if not any(j.get("id") == DREAM_JOB_ID for j in jobs):
+        jobs.append({
+            "id": DREAM_JOB_ID,
+            "name": "Self Evolution - Dream Consolidation",
+            "prompt": "运行自我进化的梦境整理：分析前日session的错误和浪费时间问题，生成进化提案。",
+            "schedule": "0 1 * * *",
+            "model": model,
+            "provider": provider,
+            "deliver": "[SILENT]",
+            "skill": "self_evolution:dream",
+        })
+
+    # Proposal push at 19:00
+    if not any(j.get("id") == PROPOSE_JOB_ID for j in jobs):
+        jobs.append({
+            "id": PROPOSE_JOB_ID,
+            "name": "Self Evolution - Proposal Push",
+            "prompt": "推送今日自我进化提案到飞书。",
+            "schedule": "0 19 * * *",
+            "model": model,
+            "provider": provider,
+            "deliver": "[SILENT]",
+            "skill": "self_evolution:propose",
+        })
+
+    _save_jobs(jobs)
+    logger.info("Registered self_evolution cron jobs: dream=1:00, propose=19:00")
+
+
+def run_dream_job():
+    """Execute the dream consolidation job.
+
+    Called by the cron system at 1:00.
+    Uses hermes unified runtime provider for model config.
+    """
+    from self_evolution.reflection_engine import DreamEngine
+
+    # DreamEngine() with no args auto-resolves via resolve_runtime_provider()
+    engine = DreamEngine()
+    report = engine.run(hours=24, max_runtime_seconds=6 * 3600)
+
+    if report:
+        logger.info("Dream consolidation complete: score=%.3f, proposals generated", report.avg_score)
+    else:
+        logger.info("Dream consolidation: no data to analyze")
+
+
+def run_propose_job():
+    """Execute the proposal push job.
+
+    Called by the cron system at 19:00.
+    """
+    from self_evolution.feishu_notifier import FeishuNotifier
+
+    notifier = FeishuNotifier()
+    notifier.send_daily_report()
+
+
+def _load_jobs() -> list:
+    """Load existing cron jobs."""
+    if not CRON_FILE.exists():
+        return []
+    try:
+        return json.loads(CRON_FILE.read_text(encoding="utf-8"))
+    except (json.JSONDecodeError, OSError):
+        return []
+
+
+def _save_jobs(jobs: list):
+    """Save cron jobs."""
+    CRON_FILE.write_text(
+        json.dumps(jobs, ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
diff --git a/self_evolution/db.py b/self_evolution/db.py
new file mode 100644
index 0000000000..04cc1bc0cd
--- /dev/null
+++ b/self_evolution/db.py
@@ -0,0 +1,296 @@
+"""
+Self Evolution Plugin — Independent SQLite Database
+=====================================================
+Independent from state.db to avoid upstream schema conflicts.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import sqlite3
+import threading
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+from self_evolution.paths import DATA_DIR as DB_DIR, DB_PATH
+
+SCHEMA_VERSION = 1
+
+VALID_TABLES = frozenset({
+    "tool_invocations", "session_scores", "outcome_signals",
+    "reflection_reports", "evolution_proposals", "improvement_units",
+    "strategy_versions", "_meta",
+})
+
+
+def _validate_table(table: str) -> None:
+    """Reject table names not in the known schema."""
+    if table not in VALID_TABLES:
+        raise ValueError(f"Invalid table name: {table!r}")
+
+
+SCHEMA = """
+-- Tool invocation telemetry
+CREATE TABLE IF NOT EXISTS tool_invocations (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    session_id TEXT NOT NULL,
+    tool_name TEXT NOT NULL,
+    duration_ms INTEGER,
+    success BOOLEAN NOT NULL,
+    error_type TEXT,
+    turn_number INTEGER,
+    created_at REAL NOT NULL DEFAULT (strftime('%s','now'))
+);
+
+-- Session quality scores
+CREATE TABLE IF NOT EXISTS session_scores (
+    session_id TEXT PRIMARY KEY,
+    composite_score REAL,
+    completion_rate REAL,
+    efficiency_score REAL,
+    cost_efficiency REAL,
+    satisfaction_proxy REAL,
+    task_category TEXT,
+    model TEXT,
+    created_at REAL NOT NULL DEFAULT (strftime('%s','now'))
+);
+
+-- Outcome signals
+CREATE TABLE IF NOT EXISTS outcome_signals (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    session_id TEXT NOT NULL,
+    signal_type TEXT NOT NULL,
+    signal_value REAL,
+    metadata TEXT,
+    created_at REAL NOT NULL DEFAULT (strftime('%s','now'))
+);
+
+-- Reflection reports
+CREATE TABLE IF NOT EXISTS reflection_reports (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    period_start REAL,
+    period_end REAL,
+    sessions_analyzed INTEGER,
+    avg_score REAL,
+    error_summary TEXT DEFAULT '',
+    waste_summary TEXT DEFAULT '',
+    code_change_summary TEXT DEFAULT '',
+    worst_patterns TEXT DEFAULT '[]',
+    best_patterns TEXT DEFAULT '[]',
+    tool_insights TEXT DEFAULT '{}',
+    recommendations TEXT DEFAULT '[]',
+    model_used TEXT DEFAULT '',
+    created_at REAL NOT NULL DEFAULT (strftime('%s','now'))
+);
+
+-- Evolution proposals
+CREATE TABLE IF NOT EXISTS evolution_proposals (
+    id TEXT PRIMARY KEY,
+    report_id INTEGER REFERENCES reflection_reports(id),
+    proposal_type TEXT NOT NULL,
+    title TEXT NOT NULL,
+    description TEXT NOT NULL,
+    expected_impact TEXT DEFAULT '',
+    risk_assessment TEXT DEFAULT 'low',
+    rollback_plan TEXT DEFAULT '',
+    status TEXT NOT NULL DEFAULT 'pending_approval',
+    user_feedback TEXT DEFAULT '',
+    created_at REAL NOT NULL DEFAULT (strftime('%s','now')),
+    resolved_at REAL
+);
+
+-- Improvement unit tracking (A/B testing)
+CREATE TABLE IF NOT EXISTS improvement_units (
+    id TEXT PRIMARY KEY,
+    proposal_id TEXT REFERENCES evolution_proposals(id),
+    change_type TEXT NOT NULL,
+    version INTEGER DEFAULT 0,
+    baseline_score REAL DEFAULT 0.0,
+    current_score REAL DEFAULT 0.0,
+    sessions_sampled INTEGER DEFAULT 0,
+    min_sessions INTEGER DEFAULT 10,
+    min_improvement REAL DEFAULT 0.05,
+    max_regression REAL DEFAULT 0.10,
+    status TEXT NOT NULL DEFAULT 'active',
+    created_at REAL NOT NULL DEFAULT (strftime('%s','now')),
+    resolved_at REAL
+);
+
+-- Strategy version history
+CREATE TABLE IF NOT EXISTS strategy_versions (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    version INTEGER NOT NULL,
+    strategies_json TEXT NOT NULL,
+    avg_score REAL,
+    active_from REAL NOT NULL,
+    active_until REAL
+);
+
+-- Schema version tracking
+CREATE TABLE IF NOT EXISTS _meta (
+    key TEXT PRIMARY KEY,
+    value TEXT NOT NULL
+);
+
+-- Indexes
+CREATE INDEX IF NOT EXISTS idx_tool_invocations_session ON tool_invocations(session_id);
+CREATE INDEX IF NOT EXISTS idx_tool_invocations_created ON tool_invocations(created_at);
+CREATE INDEX IF NOT EXISTS idx_session_scores_created ON session_scores(created_at);
+CREATE INDEX IF NOT EXISTS idx_outcome_signals_session ON outcome_signals(session_id);
+CREATE INDEX IF NOT EXISTS idx_evolution_proposals_status ON evolution_proposals(status);
+CREATE INDEX IF NOT EXISTS idx_improvement_units_status ON improvement_units(status);
+"""
+
+
+def _ensure_dir():
+    DB_DIR.mkdir(parents=True, exist_ok=True)
+
+
+_local = threading.local()
+
+
+def get_connection() -> sqlite3.Connection:
+    """Return a thread-local cached connection (reused across calls)."""
+    conn = getattr(_local, "conn", None)
+    if conn is not None:
+        try:
+            conn.execute("SELECT 1")
+            return conn
+        except sqlite3.Error:
+            try:
+                conn.close()
+            except Exception:
+                pass
+    _ensure_dir()
+    conn = sqlite3.connect(str(DB_PATH))
+    conn.row_factory = sqlite3.Row
+    conn.execute("PRAGMA journal_mode=WAL")
+    conn.execute("PRAGMA foreign_keys=ON")
+    _local.conn = conn
+    return conn
+
+
+def close_connection():
+    """Close the thread-local connection (for test cleanup / teardown)."""
+    conn = getattr(_local, "conn", None)
+    if conn is not None:
+        try:
+            conn.close()
+        except Exception:
+            pass
+        _local.conn = None
+
+
+def init_db():
+    """Initialize database with schema."""
+    conn = get_connection()
+    conn.executescript(SCHEMA)
+    conn.execute(
+        "INSERT OR REPLACE INTO _meta (key, value) VALUES (?, ?)",
+        ("schema_version", str(SCHEMA_VERSION)),
+    )
+    conn.commit()
+    logger.info("self_evolution database initialized at %s", DB_PATH)
+
+    # Schema migration: add code_change_summary column if missing
+    try:
+        conn.execute("ALTER TABLE reflection_reports ADD COLUMN code_change_summary TEXT DEFAULT ''")
+        logger.info("Added code_change_summary column to reflection_reports")
+    except sqlite3.OperationalError:
+        pass  # Column already exists
+
+    # Close after init so subsequent calls get a fresh connection with the new schema
+    close_connection()
+
+
+# ── Generic CRUD ─────────────────────────────────────────────────────────
+
+def insert(table: str, data: dict) -> int:
+    """Insert a row into a table. Returns the rowid."""
+    _validate_table(table)
+    conn = get_connection()
+    cols = ", ".join(data.keys())
+    placeholders = ", ".join("?" for _ in data)
+    sql = f"INSERT INTO {table} ({cols}) VALUES ({placeholders})"
+    cur = conn.execute(sql, list(data.values()))
+    conn.commit()
+    return cur.lastrowid
+
+
+def insert_many(table: str, rows: List[dict]):
+    """Insert multiple rows."""
+    _validate_table(table)
+    if not rows:
+        return
+    conn = get_connection()
+    cols = list(rows[0].keys())
+    placeholders = ", ".join("?" for _ in cols)
+    sql = f"INSERT INTO {table} ({', '.join(cols)}) VALUES ({placeholders})"
+    conn.executemany(sql, [[row.get(c) for c in cols] for row in rows])
+    conn.commit()
+
+
+def update(table: str, data: dict, where: str, where_params: tuple = ()):
+    """Update rows matching where clause."""
+    _validate_table(table)
+    conn = get_connection()
+    set_clause = ", ".join(f"{k} = ?" for k in data.keys())
+    sql = f"UPDATE {table} SET {set_clause} WHERE {where}"
+    conn.execute(sql, list(data.values()) + list(where_params))
+    conn.commit()
+
+
+def fetch_one(table: str, where: str = "", params: tuple = ()) -> Optional[Dict[str, Any]]:
+    """Fetch a single row as dict."""
+    _validate_table(table)
+    conn = get_connection()
+    sql = f"SELECT * FROM {table}"
+    if where:
+        sql += f" WHERE {where}"
+    sql += " LIMIT 1"
+    row = conn.execute(sql, params).fetchone()
+    return dict(row) if row else None
+
+
+def fetch_all(table: str, where: str = "", params: tuple = (),
+              order_by: str = "", limit: int = 0) -> List[Dict[str, Any]]:
+    """Fetch all matching rows as list of dicts."""
+    _validate_table(table)
+    conn = get_connection()
+    sql = f"SELECT * FROM {table}"
+    if where:
+        sql += f" WHERE {where}"
+    if order_by:
+        sql += f" ORDER BY {order_by}"
+    if limit:
+        sql += f" LIMIT {int(limit)}"
+    rows = conn.execute(sql, params).fetchall()
+    return [dict(r) for r in rows]
+
+
+def query(sql: str, params: tuple = ()) -> List[Dict[str, Any]]:
+    """Run a raw query."""
+    conn = get_connection()
+    rows = conn.execute(sql, params).fetchall()
+    return [dict(r) for r in rows]
+
+
+def execute(sql: str, params: tuple = ()):
+    """Run a raw execute."""
+    conn = get_connection()
+    conn.execute(sql, params)
+    conn.commit()
+
+
+def cleanup(days: int = 30):
+    """Remove data older than N days."""
+    cutoff = time.time() - (days * 86400)
+    conn = get_connection()
+    for table in ["tool_invocations", "outcome_signals"]:
+        conn.execute(f"DELETE FROM {table} WHERE created_at < ?", (cutoff,))
+    conn.commit()
+    logger.info("Cleaned up data older than %d days", days)
diff --git a/self_evolution/evolution_executor.py b/self_evolution/evolution_executor.py
new file mode 100644
index 0000000000..123dd6a46f
--- /dev/null
+++ b/self_evolution/evolution_executor.py
@@ -0,0 +1,325 @@
+"""
+Self Evolution Plugin — Evolution Executor
+============================================
+
+Executes approved evolution proposals with rollback support.
+
+Design reference: Claude Code plugins/ralph-wiggum/
+  - Self-referential feedback loop: execute → verify → rollback if needed
+  - Each change has a "completion promise" (verification criteria)
+  - Iteration > Perfection
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import time
+import uuid
+from pathlib import Path
+from typing import Optional
+
+from self_evolution import db
+from self_evolution.models import Proposal, ImprovementUnit
+
+logger = logging.getLogger(__name__)
+
+from self_evolution.paths import DATA_DIR as STRATEGIES_DIR, STRATEGIES_FILE, ARCHIVE_DIR
+from self_evolution.paths import SKILLS_DIR, MEMORIES_DIR
+
+
+class EvolutionExecutor:
+    """Execute approved evolution proposals.
+
+    Supported proposal types:
+      - skill: create a new skill via skill_manager_tool
+      - strategy: update strategy rules
+      - memory: update PERFORMANCE.md via memory_tool
+      - tool_preference: update tool preference config
+    """
+
+    def execute(self, proposal: Proposal):
+        """Execute an approved proposal."""
+        logger.info("Executing proposal: %s (%s)", proposal.id, proposal.proposal_type)
+
+        try:
+            match proposal.proposal_type:
+                case "skill":
+                    self._create_skill(proposal)
+                case "strategy":
+                    self._update_strategy(proposal)
+                case "memory":
+                    self._update_memory(proposal)
+                case "tool_preference":
+                    self._update_tool_preference(proposal)
+                case "code_improvement":
+                    self._save_optimization_request(proposal)
+
+            # Mark as executed
+            db.update(
+                "evolution_proposals",
+                {"status": "executed", "resolved_at": time.time()},
+                where="id = ?",
+                where_params=(proposal.id,),
+            )
+
+            # Create improvement tracking unit
+            self._create_tracking_unit(proposal)
+
+            logger.info("Proposal %s executed successfully", proposal.id)
+
+        except Exception as exc:
+            logger.exception("Failed to execute proposal %s: %s", proposal.id, exc)
+            db.update(
+                "evolution_proposals",
+                {"status": "execution_failed", "resolved_at": time.time()},
+                where="id = ?",
+                where_params=(proposal.id,),
+            )
+
+    def check_and_rollback(self):
+        """Check active improvement units and rollback if needed.
+
+        Called during dream consolidation to verify previous changes.
+        """
+        units = db.fetch_all("improvement_units", where="status = 'active'")
+
+        for unit_data in units:
+            unit = ImprovementUnit(
+                id=unit_data["id"],
+                proposal_id=unit_data["proposal_id"],
+                change_type=unit_data["change_type"],
+                version=unit_data.get("version", 0),
+                baseline_score=unit_data.get("baseline_score", 0),
+                current_score=unit_data.get("current_score", 0),
+                sessions_sampled=unit_data.get("sessions_sampled", 0),
+                min_sessions=unit_data.get("min_sessions", 10),
+                min_improvement=unit_data.get("min_improvement", 0.05),
+                max_regression=unit_data.get("max_regression", 0.10),
+            )
+
+            # Update current score from recent sessions
+            self._update_unit_score(unit)
+
+            if unit.should_revert:
+                self._revert(unit)
+                logger.warning("Rolled back improvement unit %s", unit.id)
+            elif unit.should_promote:
+                self._promote(unit)
+                logger.info("Promoted improvement unit %s", unit.id)
+
+    # ── Proposal Type Handlers ────────────────────────────────────────────
+
+    def _create_skill(self, proposal: Proposal):
+        """Create a new skill via the skill_manager_tool."""
+        from self_evolution.strategy_store import StrategyStore
+
+        store = StrategyStore()
+        skill_dir = SKILLS_DIR / proposal.id
+        skill_dir.mkdir(parents=True, exist_ok=True)
+
+        skill_content = (
+            f"---\n"
+            f"name: {proposal.id}\n"
+            f"description: {proposal.title}\n"
+            f"---\n\n"
+            f"{proposal.description}\n"
+        )
+        (skill_dir / "SKILL.md").write_text(skill_content, encoding="utf-8")
+        logger.info("Created learned skill: %s", skill_dir)
+
+    def _update_strategy(self, proposal: Proposal):
+        """Update strategy rules file with version tracking."""
+        from self_evolution.strategy_store import StrategyStore
+
+        store = StrategyStore()
+        current = store.load()
+
+        # Check for duplicate strategies by title similarity
+        rules = current.get("rules", [])
+        existing_titles = {r.get("name", "").strip().lower() for r in rules}
+        if proposal.title.strip().lower() in existing_titles:
+            logger.warning("Skipping duplicate strategy: %s", proposal.title)
+            return
+
+        # Archive current version
+        version = current.get("version", 0) + 1
+        store.archive(version - 1)
+
+        # Parse new strategy from proposal description
+        new_strategy = {
+            "id": proposal.id,
+            "name": proposal.title,
+            "type": "learned",
+            "description": proposal.description,
+            "hint_text": proposal.description,
+            "conditions": [],
+            "severity": "medium",
+            "created_at": time.time(),
+        }
+
+        # Add to strategies
+        rules.append(new_strategy)
+        current["rules"] = rules
+        current["version"] = version
+
+        store.save(current)
+        logger.info("Updated strategies to version %d", version)
+
+        # Invalidate injector cache so new strategy takes effect immediately
+        from self_evolution.strategy_injector import invalidate_cache
+        invalidate_cache()
+
+    def _update_memory(self, proposal: Proposal):
+        """Update PERFORMANCE.md via the memory system."""
+        perf_path = MEMORIES_DIR / "PERFORMANCE.md"
+        perf_path.parent.mkdir(parents=True, exist_ok=True)
+
+        existing = ""
+        if perf_path.exists():
+            existing = perf_path.read_text(encoding="utf-8")
+
+        # Append new entry
+        timestamp = time.strftime("%Y-%m-%d %H:%M", time.localtime())
+        entry = f"\n## [{timestamp}] 自动学习\n{proposal.description}\n"
+
+        # Keep file under reasonable size (last 50 entries)
+        entries = (existing + entry).split("\n## ")
+        if len(entries) > 50:
+            entries = entries[-50:]
+
+        perf_path.write_text("\n## ".join(entries), encoding="utf-8")
+        logger.info("Updated PERFORMANCE.md")
+
+    def _update_tool_preference(self, proposal: Proposal):
+        """Update tool preference config."""
+        prefs_path = STRATEGIES_DIR / "tool_preferences.json"
+        prefs = {}
+        if prefs_path.exists():
+            prefs = json.loads(prefs_path.read_text(encoding="utf-8"))
+
+        prefs[proposal.id] = {
+            "description": proposal.description,
+            "expected_impact": proposal.expected_impact,
+            "created_at": time.time(),
+        }
+
+        prefs_path.write_text(
+            json.dumps(prefs, ensure_ascii=False, indent=2),
+            encoding="utf-8",
+        )
+        logger.info("Updated tool preferences: %s", proposal.id)
+
+    # ── Tracking & Verification ───────────────────────────────────────────
+
+    def _create_tracking_unit(self, proposal: Proposal):
+        """Create an improvement tracking unit after execution.
+
+        Inspired by Ralph Wiggum's completion_promise pattern.
+        """
+        # Get baseline score from recent sessions
+        recent = db.fetch_all(
+            "session_scores",
+            order_by="created_at DESC",
+            limit=10,
+        )
+        baseline = (
+            sum(s.get("composite_score", 0) for s in recent) / len(recent)
+            if recent else 0
+        )
+
+        unit = ImprovementUnit(
+            id=f"unit-{uuid.uuid4().hex[:8]}",
+            proposal_id=proposal.id,
+            change_type=proposal.proposal_type,
+            baseline_score=baseline,
+            min_sessions=10,
+            min_improvement=0.05,
+            max_regression=0.10,
+        )
+
+        db.insert("improvement_units", unit.to_db_row())
+        logger.info("Created tracking unit: %s (baseline=%.3f)", unit.id, baseline)
+
+    def _update_unit_score(self, unit: ImprovementUnit):
+        """Update the current score for an improvement unit."""
+        # Count sessions since this unit was created
+        unit_data = db.fetch_one("improvement_units", where="id = ?", params=(unit.id,))
+        if not unit_data:
+            return
+
+        created_at = unit_data.get("created_at", 0)
+        recent = db.fetch_all(
+            "session_scores",
+            where="created_at >= ?",
+            params=(created_at,),
+            order_by="created_at DESC",
+        )
+
+        if recent:
+            current_score = sum(s.get("composite_score", 0) for s in recent) / len(recent)
+            sessions_sampled = len(recent)
+
+            db.update(
+                "improvement_units",
+                {
+                    "current_score": current_score,
+                    "sessions_sampled": sessions_sampled,
+                },
+                where="id = ?",
+                where_params=(unit.id,),
+            )
+            unit.current_score = current_score
+            unit.sessions_sampled = sessions_sampled
+
+    def _revert(self, unit: ImprovementUnit):
+        """Revert a change by restoring the previous version."""
+        from self_evolution.strategy_store import StrategyStore
+
+        store = StrategyStore()
+        if unit.version > 0:
+            old = store.load_archive(unit.version - 1)
+            if old:
+                store.save(old)
+
+        db.update(
+            "improvement_units",
+            {"status": "reverted", "resolved_at": time.time()},
+            where="id = ?",
+            where_params=(unit.id,),
+        )
+
+    def _promote(self, unit: ImprovementUnit):
+        """Promote an improvement unit from active to permanent."""
+        db.update(
+            "improvement_units",
+            {"status": "promoted", "resolved_at": time.time()},
+            where="id = ?",
+            where_params=(unit.id,),
+        )
+
+    # ── Code Improvement (save request document) ────────────────────────────
+
+    def _save_optimization_request(self, proposal: Proposal):
+        """Save a code improvement request as a document.
+
+        Does NOT auto-modify code. The user reviews the request and decides
+        whether to implement changes manually or via Claude Code.
+        """
+        req_dir = DATA_DIR / "optimization_requests"
+        req_dir.mkdir(parents=True, exist_ok=True)
+        doc_path = req_dir / f"{proposal.id}.md"
+
+        doc_content = (
+            f"# 程序优化需求\n\n"
+            f"**标题**: {proposal.title}\n"
+            f"**预期影响**: {proposal.expected_impact}\n"
+            f"**风险评估**: {proposal.risk_assessment}\n"
+            f"**回滚方案**: {proposal.rollback_plan}\n"
+            f"**创建时间**: {time.strftime('%Y-%m-%d %H:%M', time.localtime())}\n\n"
+            f"---\n\n"
+            f"{proposal.description}\n"
+        )
+
+        doc_path.write_text(doc_content, encoding="utf-8")
+        logger.info("Saved optimization request: %s", doc_path)
diff --git a/self_evolution/evolution_proposer.py b/self_evolution/evolution_proposer.py
new file mode 100644
index 0000000000..8854473fb1
--- /dev/null
+++ b/self_evolution/evolution_proposer.py
@@ -0,0 +1,229 @@
+"""
+Self Evolution Plugin — Evolution Proposer
+===========================================
+
+Converts reflection insights into concrete, actionable evolution proposals.
+
+Each proposal includes:
+  - type: skill | strategy | memory | tool_preference
+  - title: short description
+  - description: detailed change
+  - expected_impact: what improvement to expect
+  - risk_assessment: low | medium | high
+  - rollback_plan: how to revert
+"""
+
+from __future__ import annotations
+
+import logging
+import uuid
+from typing import List
+
+from self_evolution.models import Proposal, ReflectionReport
+
+logger = logging.getLogger(__name__)
+
+
+def generate_proposals(report: ReflectionReport, report_id: int) -> List[Proposal]:
+    """Generate evolution proposals from a reflection report.
+
+    Prioritizes proposals by:
+    1. Impact (fixes for systemic errors > optimizations > enhancements)
+    2. Risk (low risk first)
+    3. Feasibility (clear rollback plan)
+    """
+    proposals = []
+
+    # 1. Error patterns → code_improvement (primary) + strategy (fallback)
+    for i, pattern in enumerate(report.worst_patterns):
+        # Primary: structured optimization request
+        code_proposal = _pattern_to_code_improvement(pattern, report, report_id, i)
+        if code_proposal:
+            proposals.append(code_proposal)
+
+    # 2. Best patterns → skill (only if ≥5 successful sessions)
+    for i, pattern in enumerate(report.best_patterns):
+        proposal = _success_to_proposal(pattern, report, report_id, i)
+        if proposal:
+            proposals.append(proposal)
+
+    # 3. Recommendations → code_improvement or strategy
+    for i, rec in enumerate(report.recommendations):
+        proposal = _recommendation_to_proposal(rec, report, report_id, i)
+        if proposal:
+            proposals.append(proposal)
+
+    # Deduplicate by title similarity
+    proposals = _deduplicate(proposals)
+
+    # Cap at 5 proposals per day
+    return proposals[:5]
+
+
+def _pattern_to_code_improvement(
+    pattern: str, report: ReflectionReport, report_id: int, index: int
+) -> Proposal:
+    """Convert an error pattern into a structured code optimization request."""
+    # Extract key info from error analysis
+    error_detail = report.error_summary or ""
+    sessions = report.sessions_analyzed or 0
+    score = report.avg_score or 0
+
+    # Build structured optimization document
+    short_pattern = pattern[:60]
+    description = (
+        f"## 问题描述\n"
+        f"{short_pattern}\n\n"
+        f"## 数据支撑\n"
+        f"- 分析会话数: {sessions}\n"
+        f"- 平均质量分: {score:.3f}\n"
+        f"- 错误摘要: {error_detail[:200]}\n\n"
+        f"## 建议方向\n"
+        f"分析此错误模式的根因，考虑通过程序化手段（如工具调用前置校验、"
+        f"自动降级策略、路径预检等）来规避，而非仅靠提示词提醒。\n\n"
+        f"## 备注\n"
+        f"此为程序优化需求，审批后将保存为需求文档，需手动实施代码修改。"
+    )
+
+    return Proposal(
+        id=f"prop-opt-{uuid.uuid4().hex[:8]}",
+        report_id=report_id,
+        proposal_type="code_improvement",
+        title=f"程序优化: {short_pattern}",
+        description=description,
+        expected_impact="通过程序化手段减少同类错误",
+        risk_assessment="low",
+        rollback_plan="此提案不自动修改代码，无回滚风险",
+        status="pending_approval",
+    )
+
+
+def _error_to_proposal(
+    pattern: str, report: ReflectionReport, report_id: int, index: int
+) -> Proposal:
+    """Convert an error pattern into a compact strategy proposal (fallback)."""
+    # Generate a short hint_text (≤30 chars)
+    hint = _compress_hint(pattern)
+    return Proposal(
+        id=f"prop-error-{uuid.uuid4().hex[:8]}",
+        report_id=report_id,
+        proposal_type="strategy",
+        title=f"规避模式: {pattern[:50]}",
+        description=f"基于错误分析发现的问题模式: {pattern}\n\n"
+                    f"建议创建策略规则来规避此类问题。",
+        expected_impact="减少同类错误发生率",
+        risk_assessment="low",
+        rollback_plan="删除策略规则即可恢复",
+        status="pending_approval",
+    )
+
+
+def _success_to_proposal(
+    pattern: str, report: ReflectionReport, report_id: int, index: int
+) -> Proposal:
+    """Convert a success pattern into a proposal (skill creation).
+
+    Only generates a proposal if there are ≥5 successful sessions for this pattern.
+    """
+    success_count = _count_successful_sessions(pattern, report)
+    if success_count < 5:
+        logger.info(
+            "Skipping skill proposal: only %d successes (need 5) for: %s",
+            success_count, pattern[:40],
+        )
+        return None
+
+    return Proposal(
+        id=f"prop-success-{uuid.uuid4().hex[:8]}",
+        report_id=report_id,
+        proposal_type="skill",
+        title=f"固化成功模式: {pattern[:50]}",
+        description=f"基于成功分析发现的高效模式: {pattern}\n\n"
+                    f"已验证 {success_count} 次成功执行。\n\n"
+                    f"建议创建可复用的技能来固化此模式。",
+        expected_impact="提高同类任务效率",
+        risk_assessment="low",
+        rollback_plan="删除创建的技能即可恢复",
+        status="pending_approval",
+    )
+
+
+def _recommendation_to_proposal(
+    rec: str, report: ReflectionReport, report_id: int, index: int
+) -> Proposal:
+    """Convert a recommendation into a proposal."""
+    # Detect type from content
+    proposal_type = "strategy"
+    if any(kw in rec for kw in ["记忆", "记忆更新", "memory", "记住"]):
+        proposal_type = "memory"
+    elif any(kw in rec for kw in ["技能", "skill", "创建"]):
+        proposal_type = "skill"
+    elif any(kw in rec for kw in ["工具", "tool", "偏好"]):
+        proposal_type = "tool_preference"
+
+    return Proposal(
+        id=f"prop-rec-{uuid.uuid4().hex[:8]}",
+        report_id=report_id,
+        proposal_type=proposal_type,
+        title=f"优化建议: {rec[:50]}",
+        description=rec,
+        expected_impact="提升整体agent性能",
+        risk_assessment="low",
+        rollback_plan="移除变更即可恢复",
+        status="pending_approval",
+    )
+
+
+def _deduplicate(proposals: List[Proposal]) -> List[Proposal]:
+    """Remove proposals with very similar titles."""
+    seen_titles = set()
+    unique = []
+    for p in proposals:
+        # Normalize title for comparison
+        normalized = p.title.lower().strip()[:30]
+        if normalized not in seen_titles:
+            seen_titles.add(normalized)
+            unique.append(p)
+    return unique
+
+
+def _count_successful_sessions(pattern: str, report: ReflectionReport) -> int:
+    """Count successful sessions relevant to this pattern.
+
+    Queries session_scores for sessions with composite_score ≥ 0.7
+    and matching task_category keywords from the pattern.
+    """
+    try:
+        from self_evolution import db
+
+        # Extract potential category keywords from pattern
+        scores = db.fetch_all(
+            "session_scores",
+            where="composite_score >= ?",
+            params=(0.7,),
+            order_by="created_at DESC",
+            limit=100,
+        )
+        return len(scores)
+    except Exception:
+        # Fallback: use sessions_analyzed from report as estimate
+        return report.sessions_analyzed or 0
+
+
+def _compress_hint(pattern: str) -> str:
+    """Compress a pattern description into a short hint (≤30 chars)."""
+    # Keyword-based compression
+    mappings = [
+        (["bash", "路径", "path", "预检"], "bash前先read验证路径"),
+        (["api", "调试", "降级"], "API失败时降级只读探查"),
+        (["browser", "超时", "timeout"], "浏览器操作设超时保护"),
+        (["重试", "retry", "重复"], "避免重复重试相同操作"),
+        (["工具", "tool", "失败"], "工具失败时切换备选方案"),
+    ]
+    text = pattern.lower()
+    for keywords, hint in mappings:
+        if any(kw in text for kw in keywords):
+            return hint[:30]
+
+    # Fallback: truncate
+    return pattern[:27] + "..." if len(pattern) > 30 else pattern
diff --git a/self_evolution/feishu_notifier.py b/self_evolution/feishu_notifier.py
new file mode 100644
index 0000000000..2c56d5de1c
--- /dev/null
+++ b/self_evolution/feishu_notifier.py
@@ -0,0 +1,490 @@
+"""
+Self Evolution Plugin — Feishu Notifier
+========================================
+
+Pushes evolution proposals to Feishu at 19:00 daily.
+Uses interactive card messages with action buttons for approval.
+
+Receives callbacks when user clicks: approve / modify / reject.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import time
+from typing import Any, Dict, List, Optional
+
+from self_evolution import db
+from self_evolution.models import Proposal
+
+logger = logging.getLogger(__name__)
+
+
+class FeishuNotifier:
+    """Send evolution proposals via Feishu interactive cards."""
+
+    def __init__(self):
+        self.app_id = os.getenv("FEISHU_APP_ID", "")
+        self.app_secret = os.getenv("FEISHU_APP_SECRET", "")
+        self.enabled = bool(self.app_id and self.app_secret)
+        self._client = None
+        self._token_cache: Optional[tuple[str, float]] = None  # (token, expire_at)
+
+    def send_daily_report(self):
+        """Send pending proposals as a daily Feishu card message.
+
+        Called by the 19:00 cron job.
+        """
+        if not self.enabled:
+            logger.info("Feishu not configured, skipping notification")
+            return
+
+        # Load pending proposals
+        proposals = db.fetch_all(
+            "evolution_proposals",
+            where="status = ?",
+            params=("pending_approval",),
+            order_by="created_at DESC",
+        )
+
+        if not proposals:
+            logger.info("No pending proposals to send")
+            return
+
+        # Load latest reflection report for context
+        reports = db.fetch_all(
+            "reflection_reports",
+            order_by="created_at DESC",
+            limit=1,
+        )
+        report = reports[0] if reports else {}
+
+        # Build card
+        card = self._build_card(proposals, report)
+
+        # Send
+        self._send_card(card)
+        logger.info("Sent %d proposals via Feishu", len(proposals))
+
+    def handle_callback(self, action: str, proposal_id: str, user_input: str = ""):
+        """Handle Feishu card button callback.
+
+        Args:
+            action: "approve" | "modify" | "reject"
+            proposal_id: The proposal ID
+            user_input: Optional user modification text
+
+        Returns:
+            dict with 'feedback' (str) and 'updated_card' (dict or None).
+        """
+        result = {"feedback": "", "updated_card": None}
+
+        if action == "approve":
+            logger.info("[TRACE] handle_callback: approving proposal %s", proposal_id)
+            title = self._approve(proposal_id)
+            result["feedback"] = f"✅ 已通过并执行: {title}"
+            logger.info("[TRACE] handle_callback: approved '%s'", title)
+        elif action == "modify":
+            title = self._modify(proposal_id, user_input)
+            result["feedback"] = f"✏️ 已修改: {title}"
+        elif action == "reject":
+            title = self._reject(proposal_id, user_input)
+            result["feedback"] = f"❌ 已拒绝: {title}"
+
+        # Build updated card with remaining pending proposals
+        logger.info("[TRACE] handle_callback: building updated card")
+        result["updated_card"] = self.build_updated_card()
+        logger.info("[TRACE] handle_callback: updated_card=%s", "present" if result["updated_card"] else "None (all done)")
+        return result
+
+    def build_updated_card(self) -> Optional[dict]:
+        """Build a card with remaining pending proposals.
+
+        Returns None if no pending proposals remain (caller can show
+        a 'all done' card instead).
+        """
+        pending = db.fetch_all(
+            "evolution_proposals",
+            where="status = ?",
+            params=("pending_approval",),
+            order_by="created_at DESC",
+        )
+
+        if not pending:
+            return None
+
+        # Load latest report for context
+        reports = db.fetch_all("reflection_reports", order_by="created_at DESC", limit=1)
+        report = reports[0] if reports else {}
+
+        date_str = time.strftime("%Y-%m-%d", time.localtime())
+        elements = []
+
+        # Status bar
+        elements.append({
+            "tag": "div",
+            "text": {"tag": "lark_md", "content": f"**待审批**: {len(pending)} 个提案"},
+        })
+        elements.append({"tag": "hr"})
+
+        # Proposals
+        for i, p in enumerate(pending):
+            type_emoji = {"skill": "🛠️", "strategy": "⚡", "memory": "🧠", "tool_preference": "🔧", "code_improvement": "🏗️"}
+            emoji = type_emoji.get(p.get("proposal_type", ""), "📋")
+
+            proposal_text = (
+                f"**[{emoji}] {p.get('title', f'提案 {i+1}')}**\n"
+                f"{p.get('description', '')[:200]}\n"
+                f"预期影响: {p.get('expected_impact', 'N/A')} | "
+                f"风险: {p.get('risk_assessment', 'low')}\n"
+            )
+            elements.append({
+                "tag": "div",
+                "text": {"tag": "lark_md", "content": proposal_text},
+            })
+
+            # Action buttons
+            elements.append({
+                "tag": "action",
+                "actions": [
+                    {
+                        "tag": "button",
+                        "text": {"tag": "plain_text", "content": "通过"},
+                        "type": "primary",
+                        "value": {"action": "approve", "proposal_id": p["id"]},
+                    },
+                    {
+                        "tag": "button",
+                        "text": {"tag": "plain_text", "content": "修改"},
+                        "type": "default",
+                        "value": {"action": "modify", "proposal_id": p["id"]},
+                    },
+                    {
+                        "tag": "button",
+                        "text": {"tag": "plain_text", "content": "拒绝"},
+                        "type": "danger",
+                        "value": {"action": "reject", "proposal_id": p["id"]},
+                    },
+                ],
+            })
+
+        return {
+            "header": {
+                "title": {"tag": "plain_text", "content": f"Hermes 进化报告 ({date_str})"},
+                "template": "blue",
+            },
+            "elements": elements,
+        }
+
+    def send_rollback_notification(self, unit_id: str, reason: str):
+        """Notify user that an improvement unit was auto-rolled back."""
+        if not self.enabled:
+            return
+        card = {
+            "elements": [
+                {
+                    "tag": "div",
+                    "text": {
+                        "tag": "lark_md",
+                        "content": f"**自动回滚通知**\n\n"
+                                   f"改进单元 `{unit_id}` 已自动回滚。\n"
+                                   f"原因: {reason}",
+                    },
+                },
+            ],
+        }
+        self._send_card(card)
+
+    # ── Internal Methods ──────────────────────────────────────────────────
+
+    def _approve(self, proposal_id: str) -> str:
+        """Mark proposal as approved and trigger execution. Returns title."""
+        row = db.fetch_one("evolution_proposals", where="id = ?", params=(proposal_id,))
+        title = row.get("title", proposal_id) if row else proposal_id
+
+        db.update(
+            "evolution_proposals",
+            {"status": "approved", "resolved_at": time.time()},
+            where="id = ?",
+            where_params=(proposal_id,),
+        )
+
+        # Trigger execution
+        if row:
+            from self_evolution.evolution_executor import EvolutionExecutor
+            executor = EvolutionExecutor()
+            proposal = Proposal(
+                id=row["id"],
+                proposal_type=row["proposal_type"],
+                title=row["title"],
+                description=row["description"],
+                expected_impact=row.get("expected_impact", ""),
+                risk_assessment=row.get("risk_assessment", "low"),
+                rollback_plan=row.get("rollback_plan", ""),
+                status="approved",
+            )
+            executor.execute(proposal)
+
+        return title
+
+    def _modify(self, proposal_id: str, user_input: str) -> str:
+        """Update proposal with user's modification. Returns title."""
+        row = db.fetch_one("evolution_proposals", where="id = ?", params=(proposal_id,))
+        title = row.get("title", proposal_id) if row else proposal_id
+
+        db.update(
+            "evolution_proposals",
+            {"user_feedback": user_input, "status": "pending_approval"},
+            where="id = ?",
+            where_params=(proposal_id,),
+        )
+        return title
+
+    def _reject(self, proposal_id: str, user_input: str) -> str:
+        """Mark proposal as rejected and record reason for learning. Returns title."""
+        row = db.fetch_one("evolution_proposals", where="id = ?", params=(proposal_id,))
+        title = row.get("title", proposal_id) if row else proposal_id
+
+        db.update(
+            "evolution_proposals",
+            {"status": "rejected", "user_feedback": user_input, "resolved_at": time.time()},
+            where="id = ?",
+            where_params=(proposal_id,),
+        )
+        # Record rejection for the dream engine to learn from
+        db.insert("outcome_signals", {
+            "session_id": f"evolution_rejection_{proposal_id}",
+            "signal_type": "proposal_rejected",
+            "signal_value": 0.0,
+            "metadata": json.dumps({"proposal_id": proposal_id, "reason": user_input}, ensure_ascii=False),
+        })
+        return title
+
+    def _build_card(self, proposals: List[dict], report: dict) -> dict:
+        """Build Feishu interactive card JSON."""
+        # Header
+        date_str = time.strftime("%Y-%m-%d", time.localtime())
+        elements = []
+
+        # Overview section
+        sessions_analyzed = report.get("sessions_analyzed", 0)
+        avg_score = report.get("avg_score", 0)
+        overview = (
+            f"**日期**: {date_str}\n"
+            f"**分析Sessions**: {sessions_analyzed}\n"
+            f"**平均评分**: {avg_score:.3f}\n"
+        )
+        elements.append({
+            "tag": "div",
+            "text": {"tag": "lark_md", "content": overview},
+        })
+
+        # Error summary
+        error_summary = report.get("error_summary", "")
+        if error_summary:
+            elements.append({
+                "tag": "div",
+                "text": {"tag": "lark_md", "content": f"**错误分析**\n{error_summary}"},
+            })
+
+        # Waste summary
+        waste_summary = report.get("waste_summary", "")
+        if waste_summary:
+            elements.append({
+                "tag": "div",
+                "text": {"tag": "lark_md", "content": f"**时间浪费分析**\n{waste_summary}"},
+            })
+
+        # Code change summary
+        code_change_summary = report.get("code_change_summary", "")
+        if code_change_summary:
+            elements.append({
+                "tag": "div",
+                "text": {"tag": "lark_md", "content": f"**系统代码更新**\n{code_change_summary}"},
+            })
+
+        # Separator
+        elements.append({"tag": "hr"})
+
+        # Proposals
+        for i, p in enumerate(proposals):
+            type_emoji = {"skill": "🛠️", "strategy": "⚡", "memory": "🧠", "tool_preference": "🔧", "code_improvement": "🏗️"}
+            emoji = type_emoji.get(p.get("proposal_type", ""), "📋")
+
+            proposal_text = (
+                f"**[{emoji}] {p.get('title', f'提案 {i+1}')}**\n"
+                f"{p.get('description', '')[:200]}\n"
+                f"预期影响: {p.get('expected_impact', 'N/A')} | "
+                f"风险: {p.get('risk_assessment', 'low')}\n"
+            )
+            elements.append({
+                "tag": "div",
+                "text": {"tag": "lark_md", "content": proposal_text},
+            })
+
+            # Action buttons
+            elements.append({
+                "tag": "action",
+                "actions": [
+                    {
+                        "tag": "button",
+                        "text": {"tag": "plain_text", "content": "通过"},
+                        "type": "primary",
+                        "value": {"action": "approve", "proposal_id": p["id"]},
+                    },
+                    {
+                        "tag": "button",
+                        "text": {"tag": "plain_text", "content": "修改"},
+                        "type": "default",
+                        "value": {"action": "modify", "proposal_id": p["id"]},
+                    },
+                    {
+                        "tag": "button",
+                        "text": {"tag": "plain_text", "content": "拒绝"},
+                        "type": "danger",
+                        "value": {"action": "reject", "proposal_id": p["id"]},
+                    },
+                ],
+            })
+
+        return {
+            "header": {
+                "title": {"tag": "plain_text", "content": f"Hermes 每日进化报告 ({date_str})"},
+                "template": "blue",
+            },
+            "elements": elements,
+        }
+
+    def _get_client(self):
+        """Get or create a cached lark Client instance."""
+        if self._client is None:
+            import lark_oapi as lark
+            self._client = (
+                lark.Client.builder()
+                .app_id(self.app_id)
+                .app_secret(self.app_secret)
+                .build()
+            )
+        return self._client
+
+    def _send_card(self, card: dict):
+        """Send an interactive card via Feishu.
+
+        Prefers lark_oapi SDK (same as the gateway), falls back to REST.
+        """
+        try:
+            receive_id, receive_id_type = self._resolve_target()
+            if not receive_id:
+                logger.warning("No Feishu receive target configured")
+                return
+
+            content_str = json.dumps(card, ensure_ascii=False)
+
+            # Try SDK first (using cached client)
+            try:
+                from lark_oapi.api.im.v1 import CreateMessageRequest, CreateMessageRequestBody
+
+                client = self._get_client()
+
+                body = CreateMessageRequestBody.builder() \
+                    .receive_id(receive_id) \
+                    .msg_type("interactive") \
+                    .content(content_str) \
+                    .build()
+
+                request = CreateMessageRequest.builder() \
+                    .receive_id_type(receive_id_type) \
+                    .request_body(body) \
+                    .build()
+
+                response = client.im.v1.message.create(request)
+                if response.success():
+                    logger.info("Feishu card sent via SDK")
+                    return
+                logger.warning("Feishu SDK send failed: code=%s msg=%s", response.code, response.msg)
+            except ImportError:
+                pass
+
+            # Fallback to REST API
+            self._send_card_rest(receive_id, receive_id_type, content_str)
+
+        except Exception as exc:
+            logger.warning("Feishu notification failed: %s", exc)
+
+    def _resolve_target(self) -> tuple:
+        """Resolve (receive_id, receive_id_type) from env config."""
+        deliver_to = os.getenv("SELF_EVOLUTION_FEISHU_DELIVER", "user")
+        if deliver_to.startswith("chat:"):
+            return deliver_to.replace("chat:", ""), "chat_id"
+        user_id = os.getenv("SELF_EVOLUTION_FEISHU_USER_ID", "")
+        if not user_id:
+            return "", ""
+        if user_id.startswith("ou_"):
+            return user_id, "open_id"
+        if user_id.startswith("oc_"):
+            return user_id, "chat_id"
+        return user_id, "user_id"
+
+    def _send_card_rest(self, receive_id: str, receive_id_type: str, content: str):
+        """Fallback: send card via REST API."""
+        import requests
+
+        token = self._get_tenant_token()
+        if not token:
+            logger.warning("Failed to get Feishu token")
+            return
+
+        resp = requests.post(
+            "https://open.feishu.cn/open-apis/im/v1/messages",
+            headers={"Authorization": f"Bearer {token}"},
+            params={"receive_id_type": receive_id_type},
+            json={"receive_id": receive_id, "msg_type": "interactive", "content": content},
+            timeout=30,
+        )
+        if resp.status_code != 200:
+            logger.warning("Feishu REST send failed: %s", resp.text)
+
+    def _send_confirmation(self, proposal_id: str, message: str):
+        """Send a simple confirmation message."""
+        if not self.enabled:
+            return
+        card = {
+            "elements": [
+                {
+                    "tag": "div",
+                    "text": {
+                        "tag": "lark_md",
+                        "content": f"**提案 `{proposal_id}`**: {message}",
+                    },
+                },
+            ],
+        }
+        self._send_card(card)
+
+    def _get_tenant_token(self) -> Optional[str]:
+        """Get Feishu tenant access token with caching (1.5h TTL)."""
+        if self._token_cache is not None:
+            token, expire_at = self._token_cache
+            if time.time() < expire_at:
+                return token
+        try:
+            import requests
+            resp = requests.post(
+                "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
+                json={
+                    "app_id": self.app_id,
+                    "app_secret": self.app_secret,
+                },
+                timeout=10,
+            )
+            if resp.status_code == 200:
+                token = resp.json().get("tenant_access_token")
+                if token:
+                    # Feishu tokens expire in ~2h; cache for 1.5h
+                    self._token_cache = (token, time.time() + 5400)
+                return token
+        except Exception as exc:
+            logger.debug("Failed to get Feishu token: %s", exc)
+        return None
diff --git a/self_evolution/git_analyzer.py b/self_evolution/git_analyzer.py
new file mode 100644
index 0000000000..5afded262e
--- /dev/null
+++ b/self_evolution/git_analyzer.py
@@ -0,0 +1,170 @@
+"""
+Self Evolution Plugin — Git Analysis
+=====================================
+
+Analyzes git commit history for the dream consolidation engine.
+
+Uses a single batched ``git log --stat --name-only`` call instead of
+25+ individual subprocess invocations.
+
+Extracted from reflection_engine.py for single-responsibility.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+import subprocess
+import time
+from pathlib import Path
+from typing import Dict
+
+from self_evolution.models import CodeChangeAnalysis, CommitInfo
+
+logger = logging.getLogger(__name__)
+
+
+def analyze_code_changes(hours: int = 24) -> CodeChangeAnalysis:
+    """Analyze git commits from the previous period.
+
+    Uses a single batched git log call with --stat --name-only
+    instead of 25+ individual subprocess calls.
+    """
+    project_root = str(Path(__file__).resolve().parent.parent)
+
+    cutoff_epoch = time.time() - (hours * 3600)
+    cutoff_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(cutoff_epoch))
+
+    try:
+        # Single batched call: format + shortstat + name-only
+        result = subprocess.run(
+            ["git", "log",
+             "--format=COMMITSTART%h%n%s%n%an%n%at%n%b%nENDHEADER",
+             "--shortstat", "--name-only",
+             "--no-merges", f"--since={cutoff_date}", "-15"],
+            capture_output=True, text=True, timeout=30,
+            cwd=project_root,
+        )
+        if result.returncode != 0 or not result.stdout.strip():
+            return CodeChangeAnalysis()
+
+        commits = _parse_batched_output(result.stdout)
+        if not commits:
+            return CodeChangeAnalysis()
+
+        # Aggregate stats
+        total_ins = sum(c.insertions for c in commits)
+        total_del = sum(c.deletions for c in commits)
+        total_files = sum(c.files_changed for c in commits)
+        authors = list(dict.fromkeys(c.author for c in commits))
+
+        # Categorize by conventional commit prefix
+        categories: Dict[str, int] = {}
+        for c in commits:
+            cat = _categorize_commit(c.subject)
+            categories[cat] = categories.get(cat, 0) + 1
+
+        # Extract top-level module areas
+        all_files = []
+        for c in commits:
+            all_files.extend(c.file_list)
+        areas = list(dict.fromkeys(
+            f.split("/")[0] for f in all_files
+            if "/" in f and not f.startswith(".")
+        ))[:10]
+
+        return CodeChangeAnalysis(
+            commits=commits,
+            total_commits=len(commits),
+            total_insertions=total_ins,
+            total_deletions=total_del,
+            total_files_changed=total_files,
+            authors=authors,
+            change_categories=categories,
+            areas_touched=areas,
+        )
+
+    except (subprocess.SubprocessError, FileNotFoundError, OSError):
+        logger.debug("git analysis unavailable", exc_info=True)
+        return CodeChangeAnalysis()
+
+
+def _parse_batched_output(stdout: str) -> list:
+    """Parse the batched git log output into CommitInfo objects."""
+    commits = []
+    raw_commits = stdout.split("COMMITSTART")
+    for raw in raw_commits:
+        raw = raw.strip()
+        if not raw:
+            continue
+
+        header_end = raw.find("ENDHEADER")
+        if header_end < 0:
+            continue
+        header = raw[:header_end].strip()
+        lines = header.split("\n")
+        if len(lines) < 4:
+            continue
+
+        hash_short = lines[0].strip()
+        subject = lines[1].strip()
+        author = lines[2].strip()
+        try:
+            timestamp = float(lines[3].strip())
+        except ValueError:
+            continue
+        body = "\n".join(lines[4:]).strip()[:500]
+
+        # After ENDHEADER: shortstat line(s) + file list
+        rest = raw[header_end + len("ENDHEADER"):].strip()
+
+        files_changed = 0
+        insertions = 0
+        deletions = 0
+        file_list = []
+        stat_done = False
+        for rline in rest.split("\n"):
+            rline = rline.strip()
+            if not rline:
+                continue
+            if not stat_done and ("files changed" in rline or "file changed" in rline
+                                  or "insertion" in rline or "deletion" in rline):
+                files_changed = _parse_int(r'(\d+) files? changed', rline)
+                insertions = _parse_int(r'(\d+) insertion', rline)
+                deletions = _parse_int(r'(\d+) deletion', rline)
+                stat_done = True
+                continue
+            if "/" in rline or "." in rline:
+                file_list.append(rline)
+
+        commits.append(CommitInfo(
+            hash_short=hash_short,
+            subject=subject,
+            body=body,
+            author=author,
+            timestamp=timestamp,
+            files_changed=files_changed,
+            insertions=insertions,
+            deletions=deletions,
+            file_list=file_list[:20],
+        ))
+
+    return commits
+
+
+# ── Helpers ───────────────────────────────────────────────────────────────
+
+
+def _parse_int(pattern: str, text: str) -> int:
+    """Extract first integer matching regex pattern from text."""
+    m = re.search(pattern, text)
+    return int(m.group(1)) if m else 0
+
+
+def _categorize_commit(subject: str) -> str:
+    """Categorize commit by conventional commit prefix."""
+    s = subject.lower()
+    for prefix in ("feat", "fix", "refactor", "test", "docs", "chore", "perf", "style", "ci", "build"):
+        if s.startswith(prefix):
+            return prefix
+    return "other"
diff --git a/self_evolution/hooks.py b/self_evolution/hooks.py
new file mode 100644
index 0000000000..0cdb1e25a3
--- /dev/null
+++ b/self_evolution/hooks.py
@@ -0,0 +1,200 @@
+"""
+Self Evolution Plugin — Lifecycle Hooks
+========================================
+
+Registered hooks:
+
+  - post_tool_call:  Collect per-tool telemetry
+  - on_session_end:  Compute quality score + detect outcome signals
+  - pre_llm_call:    Inject learned strategy hints
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+import time
+from typing import Any, Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+# ── Correction detection patterns (inspired by Claude Code conversation-analyzer) ──
+
+CORRECTION_PATTERNS = re.compile(
+    r"(不对|错误|重试|不要|停|stop|wrong|retry|no|don't|not that|不是|不是这个|为什么|换一种)",
+    re.IGNORECASE,
+)
+
+FRUSTRATION_PATTERNS = re.compile(
+    r"(烦|慢|太慢|浪费时间|浪费时间|浪费时间|why did you|无语|算了|够了)",
+    re.IGNORECASE,
+)
+
+
+# ── post_tool_call ───────────────────────────────────────────────────────
+
+def on_tool_call(**kwargs) -> None:
+    """Collect per-tool invocation telemetry."""
+    from self_evolution.db import insert
+
+    tool_name = kwargs.get("tool_name", "unknown")
+    started_at = kwargs.get("started_at", time.time())
+    duration_ms = kwargs.get("duration_ms", 0)
+    success = kwargs.get("success", True)
+    error_type = kwargs.get("error_type") if not success else None
+    session_id = kwargs.get("session_id", "")
+    turn_number = kwargs.get("turn_number", 0)
+
+    try:
+        insert("tool_invocations", {
+            "session_id": session_id,
+            "tool_name": tool_name,
+            "duration_ms": duration_ms,
+            "success": success,
+            "error_type": error_type,
+            "turn_number": turn_number,
+            "created_at": started_at,
+        })
+    except Exception as exc:
+        logger.warning("telemetry insert failed: %s", exc)
+
+
+# ── on_session_end ───────────────────────────────────────────────────────
+
+def on_session_end(**kwargs) -> None:
+    """Compute quality score and detect outcome signals when session ends."""
+    from self_evolution.db import insert, insert_many
+    from self_evolution.quality_scorer import compute_score
+
+    session_data = kwargs.get("session_data", {})
+    session_id = session_data.get("session_id", "")
+
+    if not session_id:
+        return
+
+    # Compute quality score
+    score = compute_score(session_data)
+    try:
+        insert("session_scores", score.to_db_row())
+    except Exception as exc:
+        logger.warning("score insert failed: %s", exc)
+
+    # Detect and batch-insert outcome signals
+    signals = _detect_outcome_signals(session_data, kwargs)
+    if signals:
+        try:
+            insert_many("outcome_signals", signals)
+        except Exception as exc:
+            logger.warning("signal insert failed: %s", exc)
+
+
+def _detect_outcome_signals(session_data: dict, kwargs: dict) -> list:
+    """Detect implicit outcome signals from session behavior.
+
+    Inspired by Claude Code conversation-analyzer's signal detection:
+    - Explicit corrections: user says "不对", "重试"
+    - Frustration signals: user says "为什么", "太慢"
+    - Completion / interruption status
+    - Budget exhaustion
+    """
+    signals = []
+    session_id = session_data.get("session_id", "")
+
+    # Completion signal
+    completed = session_data.get("completed", False)
+    interrupted = session_data.get("interrupted", False)
+    partial = session_data.get("partial", False)
+
+    if completed:
+        signals.append({
+            "session_id": session_id,
+            "signal_type": "completed",
+            "signal_value": 1.0,
+            "metadata": "{}",
+        })
+    elif interrupted:
+        signals.append({
+            "session_id": session_id,
+            "signal_type": "interrupted",
+            "signal_value": 0.5,
+            "metadata": "{}",
+        })
+    elif partial:
+        signals.append({
+            "session_id": session_id,
+            "signal_type": "partial",
+            "signal_value": 0.3,
+            "metadata": "{}",
+        })
+
+    # Budget exhaustion
+    max_iterations = session_data.get("max_iterations", 0)
+    iterations = session_data.get("iterations", 0)
+    if max_iterations and iterations >= max_iterations:
+        signals.append({
+            "session_id": session_id,
+            "signal_type": "budget_exhausted",
+            "signal_value": 0.0,
+            "metadata": f'{{"iterations": {iterations}}}',
+        })
+
+    # User correction / frustration detection from messages
+    messages = session_data.get("messages", [])
+    for msg in messages:
+        if msg.get("role") != "user":
+            continue
+        content = msg.get("content", "")
+        if isinstance(content, list):
+            content = " ".join(
+                block.get("text", "") for block in content
+                if isinstance(block, dict) and block.get("type") == "text"
+            )
+
+        if CORRECTION_PATTERNS.search(content):
+            signals.append({
+                "session_id": session_id,
+                "signal_type": "correction",
+                "signal_value": 0.2,
+                "metadata": f'{{"text": {repr(content[:100])}}}',
+            })
+            break  # Only one correction signal per session
+
+        if FRUSTRATION_PATTERNS.search(content):
+            signals.append({
+                "session_id": session_id,
+                "signal_type": "frustration",
+                "signal_value": 0.1,
+                "metadata": f'{{"text": {repr(content[:100])}}}',
+            })
+            break
+
+    return signals
+
+
+# ── pre_llm_call ─────────────────────────────────────────────────────────
+
+def on_pre_llm_call(**kwargs) -> Optional[Dict[str, Any]]:
+    """Inject learned strategy hints into system prompt.
+
+    Inspired by Claude Code learning-output-style SessionStart hook pattern:
+    automatically inject behavioral context without user action.
+    """
+    from self_evolution.strategy_injector import inject_hints
+
+    try:
+        hints = inject_hints(kwargs)
+        if hints:
+            return {"system_hint": hints}
+    except Exception as exc:
+        logger.warning("strategy injection failed: %s", exc)
+
+    return None
+
+
+# ── Registration ─────────────────────────────────────────────────────────
+
+def register_all(ctx) -> None:
+    """Register all lifecycle hooks via PluginContext."""
+    ctx.register_hook("post_tool_call", on_tool_call)
+    ctx.register_hook("on_session_end", on_session_end)
+    ctx.register_hook("pre_llm_call", on_pre_llm_call)
diff --git a/self_evolution/model_config.py b/self_evolution/model_config.py
new file mode 100644
index 0000000000..69c0082ccb
--- /dev/null
+++ b/self_evolution/model_config.py
@@ -0,0 +1,248 @@
+"""
+Self Evolution Plugin — Model Configuration & Failover
+======================================================
+
+Handles runtime model resolution (primary / fallback / multimodal)
+and thread-safe failover state management.
+
+Extracted from reflection_engine.py for single-responsibility.
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+import time
+from typing import Any, Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# ── Model Configuration Resolution ────────────────────────────────────────
+
+
+def resolve_config() -> dict:
+    """Resolve model config via hermes unified runtime provider.
+
+    Returns dict with:
+        base_url, api_key, model, provider — primary text model
+        fallback: {base_url, api_key, model, provider} — fallback text model
+        multimodal: {base_url, api_key, model, provider} — vision model
+    Returns empty dict if no provider is available.
+    """
+    try:
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+        from hermes_cli.config import load_config
+
+        runtime = resolve_runtime_provider()
+        config = load_config()
+        model_name = config.get("model", {}).get("default", "")
+
+        result = {
+            "base_url": runtime.get("base_url", ""),
+            "api_key": runtime.get("api_key", ""),
+            "model": runtime.get("model", model_name),
+            "provider": runtime.get("provider", ""),
+        }
+
+        result["fallback"] = _resolve_fallback_config(config)
+        result["multimodal"] = _resolve_multimodal_config(config)
+
+        return result
+    except Exception:
+        logger.warning("Failed to resolve runtime provider", exc_info=True)
+        return {}
+
+
+def _resolve_fallback_config(config: dict = None) -> dict:
+    """Resolve fallback text model from config.yaml fallback_providers."""
+    try:
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+
+        if config is None:
+            from hermes_cli.config import load_config
+            config = load_config()
+
+        for fb in config.get("fallback_providers", []):
+            fb_provider = (fb.get("provider") or "").strip()
+            fb_model = (fb.get("model") or "").strip()
+            if not fb_provider:
+                continue
+            try:
+                rt = resolve_runtime_provider(requested=fb_provider)
+                base_url = rt.get("base_url", "")
+                api_key = rt.get("api_key", "")
+                if base_url and fb_model:
+                    return {
+                        "base_url": base_url,
+                        "api_key": api_key,
+                        "model": fb_model,
+                        "provider": rt.get("provider", ""),
+                    }
+            except Exception:
+                pass
+
+        for cp in config.get("custom_providers", []):
+            base_url = (cp.get("base_url") or cp.get("api", "")).strip()
+            if base_url and ("localhost" in base_url or "127.0.0.1" in base_url):
+                model = (cp.get("model") or "").strip()
+                if not model:
+                    model = _detect_local_model(
+                        base_url,
+                        (cp.get("api_key") or "").strip(),
+                    )
+                if model and "gemma-4-26b" not in model.lower():
+                    return {
+                        "base_url": base_url.rstrip("/"),
+                        "api_key": (cp.get("api_key") or "").strip(),
+                        "model": model,
+                        "provider": "custom",
+                    }
+
+        return {}
+    except Exception:
+        logger.warning("Failed to resolve fallback config", exc_info=True)
+        return {}
+
+
+def _resolve_multimodal_config(config: dict = None) -> dict:
+    """Resolve multimodal (vision) model config."""
+    try:
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+
+        if config is None:
+            from hermes_cli.config import load_config
+            config = load_config()
+
+        aux = config.get("auxiliary", {})
+        vision_cfg = aux.get("vision", {})
+        vision_provider = (vision_cfg.get("provider") or "").strip().lower()
+        if vision_provider and vision_provider != "auto":
+            try:
+                rt = resolve_runtime_provider(requested=vision_provider)
+                if rt.get("base_url"):
+                    return {
+                        "base_url": rt.get("base_url", ""),
+                        "api_key": rt.get("api_key", ""),
+                        "model": vision_cfg.get("model") or rt.get("model", ""),
+                        "provider": rt.get("provider", ""),
+                    }
+            except Exception:
+                pass
+
+        for cp in config.get("custom_providers", []):
+            base_url = (cp.get("base_url") or cp.get("api", "")).strip()
+            if base_url and ("localhost" in base_url or "127.0.0.1" in base_url):
+                api_key = (cp.get("api_key") or "").strip()
+                key_env = (cp.get("key_env") or "").strip()
+                if not api_key and key_env:
+                    import os
+                    api_key = os.getenv(key_env, "")
+                model = (cp.get("model") or "").strip()
+                if not model:
+                    model = _detect_local_model(base_url, api_key)
+                if model:
+                    return {
+                        "base_url": base_url.rstrip("/"),
+                        "api_key": api_key,
+                        "model": model,
+                        "provider": "custom",
+                    }
+
+        return {}
+    except Exception:
+        logger.warning("Failed to resolve multimodal config", exc_info=True)
+        return {}
+
+
+# ── Failover State (thread-safe) ──────────────────────────────────────────
+
+_active_model: str = "primary"
+_last_health_check: float = 0.0
+_HEALTH_CHECK_INTERVAL: int = 1800  # 30 minutes
+_failover_lock = threading.Lock()
+
+
+def _check_primary_health(config: dict) -> bool:
+    """Quick health check: send a minimal request to the primary model."""
+    try:
+        import requests
+        base_url = config.get("base_url", "")
+        api_key = config.get("api_key", "")
+        model = config.get("model", "")
+        if not base_url or not model:
+            return False
+        resp = requests.post(
+            f"{base_url.rstrip('/')}/chat/completions",
+            headers={
+                "Authorization": f"Bearer {api_key}",
+                "Content-Type": "application/json",
+            },
+            json={
+                "model": model,
+                "messages": [{"role": "user", "content": "OK"}],
+                "max_tokens": 2,
+            },
+            timeout=15,
+        )
+        return resp.status_code == 200
+    except Exception:
+        return False
+
+
+def get_active_text_config(config: dict) -> tuple:
+    """Return (active_config_dict, is_fallback) based on failover state."""
+    global _active_model, _last_health_check
+
+    with _failover_lock:
+        now = time.time()
+
+        if _active_model == "fallback":
+            if now - _last_health_check >= _HEALTH_CHECK_INTERVAL:
+                _last_health_check = now
+                if _check_primary_health(config):
+                    _active_model = "primary"
+                    logger.info("Primary model recovered, switching back")
+                else:
+                    logger.info("Primary model still unavailable, staying on fallback")
+
+        fallback = config.get("fallback", {})
+        if _active_model == "primary":
+            return config, False
+        elif fallback:
+            return fallback, True
+        else:
+            return config, False
+
+
+def switch_to_fallback():
+    """Mark primary as down and switch to fallback."""
+    global _active_model, _last_health_check
+    with _failover_lock:
+        _active_model = "fallback"
+        _last_health_check = time.time()
+    logger.warning("Primary model failed, switched to fallback")
+
+
+def _detect_local_model(base_url: str, api_key: str = "") -> str:
+    """Auto-detect a multimodal model from a local server."""
+    try:
+        import requests
+        headers = {}
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+        resp = requests.get(
+            f"{base_url.rstrip('/')}/models",
+            headers=headers, timeout=5,
+        )
+        if resp.ok:
+            models = resp.json().get("data", [])
+            multimodal_hints = ["gemma-4", "qwen2-vl", "qwen-vl", "llava", "pixtral", "vision"]
+            for m in models:
+                mid = m.get("id", "").lower()
+                for hint in multimodal_hints:
+                    if hint in mid:
+                        return m["id"]
+    except Exception:
+        pass
+    return ""
diff --git a/self_evolution/models.py b/self_evolution/models.py
new file mode 100644
index 0000000000..a8f184ea09
--- /dev/null
+++ b/self_evolution/models.py
@@ -0,0 +1,363 @@
+"""
+Self Evolution Plugin — Data Models
+=====================================
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field, asdict
+from typing import Any, Dict, List, Optional
+import json
+import time
+
+
+def _now() -> float:
+    return time.time()
+
+
+def _ts() -> str:
+    return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+
+
+# ── Quality Scoring ──────────────────────────────────────────────────────
+
+@dataclass
+class QualityScore:
+    session_id: str
+    composite: float = 0.0
+    completion_rate: float = 0.0
+    efficiency_score: float = 0.0
+    cost_efficiency: float = 0.0
+    satisfaction_proxy: float = 0.0
+    task_category: str = ""
+    model: str = ""
+    created_at: float = field(default_factory=_now)
+
+    def to_db_row(self) -> dict:
+        return {
+            "session_id": self.session_id,
+            "composite_score": self.composite,
+            "completion_rate": self.completion_rate,
+            "efficiency_score": self.efficiency_score,
+            "cost_efficiency": self.cost_efficiency,
+            "satisfaction_proxy": self.satisfaction_proxy,
+            "task_category": self.task_category,
+            "model": self.model,
+            "created_at": self.created_at,
+        }
+
+
+# ── Error Analysis ───────────────────────────────────────────────────────
+
+@dataclass
+class ToolFailure:
+    tool_name: str
+    error_type: str
+    count: int
+    sessions_affected: List[str] = field(default_factory=list)
+    example_session: str = ""
+
+
+@dataclass
+class RetryPattern:
+    session_id: str
+    tool_name: str
+    attempt_count: int
+    final_outcome: str  # "success" | "failure" | "abandoned"
+
+
+@dataclass
+class ErrorAnalysis:
+    tool_failures: List[ToolFailure] = field(default_factory=list)
+    retry_patterns: List[RetryPattern] = field(default_factory=list)
+    incomplete_sessions: List[str] = field(default_factory=list)
+    user_corrections: int = 0
+    correction_examples: List[str] = field(default_factory=list)
+    api_error_count: int = 0
+    api_error_types: Dict[str, int] = field(default_factory=dict)
+
+    def summary(self) -> str:
+        lines = []
+        if self.tool_failures:
+            lines.append(f"工具失败: {len(self.tool_failures)} 种工具出错")
+            for tf in self.tool_failures[:5]:
+                lines.append(f"  - {tf.tool_name}: {tf.count}次 ({tf.error_type})")
+        if self.retry_patterns:
+            retries = len(self.retry_patterns)
+            lines.append(f"重复重试: {retries} 次")
+        if self.incomplete_sessions:
+            lines.append(f"未完成session: {len(self.incomplete_sessions)} 个")
+        if self.user_corrections:
+            lines.append(f"用户纠正: {self.user_corrections} 次")
+        if self.api_error_count:
+            lines.append(f"API错误: {self.api_error_count} 次")
+        return "\n".join(lines)
+
+
+# ── Time Waste Analysis ──────────────────────────────────────────────────
+
+@dataclass
+class ToolDuration:
+    tool_name: str
+    total_duration_ms: int
+    call_count: int
+    avg_duration_ms: float
+
+
+@dataclass
+class RepeatedOperation:
+    description: str
+    count: int
+    sessions: List[str] = field(default_factory=list)
+    wasted_ms: int = 0
+
+
+@dataclass
+class WasteAnalysis:
+    slowest_tools: List[ToolDuration] = field(default_factory=list)
+    repeated_operations: List[RepeatedOperation] = field(default_factory=list)
+    inefficient_sessions: List[str] = field(default_factory=list)
+    shortcut_opportunities: List[str] = field(default_factory=list)
+
+    def summary(self) -> str:
+        lines = []
+        if self.slowest_tools:
+            lines.append("耗时最长的工具:")
+            for td in self.slowest_tools[:5]:
+                lines.append(f"  - {td.tool_name}: 平均{td.avg_duration_ms:.0f}ms ({td.call_count}次)")
+        if self.repeated_operations:
+            lines.append(f"重复操作: {len(self.repeated_operations)} 种")
+            for ro in self.repeated_operations[:5]:
+                lines.append(f"  - {ro.description}: {ro.count}次")
+        if self.inefficient_sessions:
+            lines.append(f"低效session: {len(self.inefficient_sessions)} 个")
+        if self.shortcut_opportunities:
+            lines.append(f"可优化路径: {len(self.shortcut_opportunities)} 个")
+        return "\n".join(lines)
+
+
+# ── Code Change Analysis ──────────────────────────────────────────────────
+
+@dataclass
+class CommitInfo:
+    hash_short: str
+    subject: str
+    body: str = ""
+    author: str = ""
+    timestamp: float = 0.0
+    files_changed: int = 0
+    insertions: int = 0
+    deletions: int = 0
+    file_list: List[str] = field(default_factory=list)
+
+
+@dataclass
+class CodeChangeAnalysis:
+    commits: List[CommitInfo] = field(default_factory=list)
+    total_commits: int = 0
+    total_insertions: int = 0
+    total_deletions: int = 0
+    total_files_changed: int = 0
+    authors: List[str] = field(default_factory=list)
+    change_categories: Dict[str, int] = field(default_factory=dict)
+    areas_touched: List[str] = field(default_factory=list)
+
+    def summary(self) -> str:
+        if not self.commits:
+            return "代码更新: 无新提交"
+        lines = [
+            f"代码更新: {self.total_commits} commits, "
+            f"+{self.total_insertions}/-{self.total_deletions} lines, "
+            f"{self.total_files_changed} files changed",
+        ]
+        if self.change_categories:
+            cats = ", ".join(f"{k}: {v}" for k, v in self.change_categories.items())
+            lines.append(f"提交类型分布: {cats}")
+        if self.areas_touched:
+            lines.append(f"涉及模块: {', '.join(self.areas_touched)}")
+        lines.append("主要变更:")
+        for c in self.commits[:8]:
+            lines.append(f"  - {c.subject} ({c.hash_short}, +{c.insertions}/-{c.deletions})")
+        return "\n".join(lines)
+
+
+# ── Reflection Report ────────────────────────────────────────────────────
+
+@dataclass
+class ReflectionReport:
+    period_start: float
+    period_end: float
+    sessions_analyzed: int = 0
+    avg_score: float = 0.0
+    error_summary: str = ""
+    waste_summary: str = ""
+    worst_patterns: List[str] = field(default_factory=list)
+    best_patterns: List[str] = field(default_factory=list)
+    tool_insights: Dict[str, Dict[str, Any]] = field(default_factory=dict)
+    recommendations: List[str] = field(default_factory=list)
+    code_change_summary: str = ""
+    model_used: str = ""
+    created_at: float = field(default_factory=_now)
+
+    def to_db_row(self) -> dict:
+        return {
+            "period_start": self.period_start,
+            "period_end": self.period_end,
+            "sessions_analyzed": self.sessions_analyzed,
+            "avg_score": self.avg_score,
+            "error_summary": self.error_summary,
+            "waste_summary": self.waste_summary,
+            "worst_patterns": json.dumps(self.worst_patterns, ensure_ascii=False),
+            "best_patterns": json.dumps(self.best_patterns, ensure_ascii=False),
+            "tool_insights": json.dumps(self.tool_insights, ensure_ascii=False),
+            "recommendations": json.dumps(self.recommendations, ensure_ascii=False),
+            "code_change_summary": self.code_change_summary,
+            "model_used": self.model_used,
+            "created_at": self.created_at,
+        }
+
+
+# ── Evolution Proposal ───────────────────────────────────────────────────
+
+@dataclass
+class Proposal:
+    id: str
+    proposal_type: str  # skill | strategy | memory | tool_preference | code_improvement
+    title: str
+    description: str
+    expected_impact: str = ""
+    risk_assessment: str = "low"
+    rollback_plan: str = ""
+    status: str = "pending_approval"
+    report_id: Optional[int] = None
+    user_feedback: str = ""
+    created_at: float = field(default_factory=_now)
+    resolved_at: Optional[float] = None
+
+    def to_db_row(self) -> dict:
+        return {
+            "id": self.id,
+            "report_id": self.report_id,
+            "proposal_type": self.proposal_type,
+            "title": self.title,
+            "description": self.description,
+            "expected_impact": self.expected_impact,
+            "risk_assessment": self.risk_assessment,
+            "rollback_plan": self.rollback_plan,
+            "status": self.status,
+            "user_feedback": self.user_feedback,
+            "created_at": self.created_at,
+            "resolved_at": self.resolved_at,
+        }
+
+
+# ── Improvement Unit (A/B Test Tracking) ─────────────────────────────────
+
+@dataclass
+class ImprovementUnit:
+    id: str
+    proposal_id: str
+    change_type: str
+    version: int = 0
+    baseline_score: float = 0.0
+    current_score: float = 0.0
+    sessions_sampled: int = 0
+    min_sessions: int = 10
+    min_improvement: float = 0.05
+    max_regression: float = 0.10
+    status: str = "active"  # active | promoted | reverted
+    created_at: float = field(default_factory=_now)
+    resolved_at: Optional[float] = None
+
+    @property
+    def should_revert(self) -> bool:
+        return (
+            self.sessions_sampled >= 3
+            and (self.baseline_score - self.current_score) > self.max_regression
+        )
+
+    @property
+    def should_promote(self) -> bool:
+        return (
+            self.sessions_sampled >= self.min_sessions
+            and (self.current_score - self.baseline_score) >= self.min_improvement
+        )
+
+    def to_db_row(self) -> dict:
+        return {
+            "id": self.id,
+            "proposal_id": self.proposal_id,
+            "change_type": self.change_type,
+            "version": self.version,
+            "baseline_score": self.baseline_score,
+            "current_score": self.current_score,
+            "sessions_sampled": self.sessions_sampled,
+            "min_sessions": self.min_sessions,
+            "min_improvement": self.min_improvement,
+            "max_regression": self.max_regression,
+            "status": self.status,
+            "created_at": self.created_at,
+            "resolved_at": self.resolved_at,
+        }
+
+
+# ── Strategy Rule ────────────────────────────────────────────────────────
+
+@dataclass
+class StrategyCondition:
+    field: str
+    operator: str  # regex_match | contains | equals | not_contains
+    pattern: str
+
+
+@dataclass
+class StrategyRule:
+    id: str
+    name: str
+    strategy_type: str  # hint | avoid | prefer
+    description: str
+    conditions: List[StrategyCondition] = field(default_factory=list)
+    hint_text: str = ""
+    severity: str = "medium"  # high | medium | low
+    enabled: bool = True
+    version: int = 1
+    source: str = "learned"  # learned | manual | default
+    created_at: float = field(default_factory=_now)
+
+    def to_dict(self) -> dict:
+        return {
+            "id": self.id,
+            "name": self.name,
+            "strategy_type": self.strategy_type,
+            "description": self.description,
+            "conditions": [
+                {"field": c.field, "operator": c.operator, "pattern": c.pattern}
+                for c in self.conditions
+            ],
+            "hint_text": self.hint_text,
+            "severity": self.severity,
+            "enabled": self.enabled,
+            "version": self.version,
+            "source": self.source,
+            "created_at": self.created_at,
+        }
+
+    @classmethod
+    def from_dict(cls, d: dict) -> StrategyRule:
+        conditions = [
+            StrategyCondition(field=c["field"], operator=c["operator"], pattern=c["pattern"])
+            for c in d.get("conditions", [])
+        ]
+        return cls(
+            id=d["id"],
+            name=d["name"],
+            strategy_type=d.get("strategy_type", "hint"),
+            description=d.get("description", ""),
+            conditions=conditions,
+            hint_text=d.get("hint_text", ""),
+            severity=d.get("severity", "medium"),
+            enabled=d.get("enabled", True),
+            version=d.get("version", 1),
+            source=d.get("source", "learned"),
+            created_at=d.get("created_at", _now()),
+        )
diff --git a/self_evolution/paths.py b/self_evolution/paths.py
new file mode 100644
index 0000000000..0c5f7018e7
--- /dev/null
+++ b/self_evolution/paths.py
@@ -0,0 +1,17 @@
+"""
+Self Evolution Plugin — Centralized Path Definitions
+=====================================================
+
+Single source of truth for all filesystem paths used by the plugin.
+"""
+
+from pathlib import Path
+
+HERMES_HOME = Path.home() / ".hermes"
+DATA_DIR = HERMES_HOME / "self_evolution"
+DB_PATH = DATA_DIR / "evolution.db"
+STRATEGIES_FILE = DATA_DIR / "strategies.json"
+ARCHIVE_DIR = DATA_DIR / "archive"
+SKILLS_DIR = HERMES_HOME / "skills" / "learned"
+MEMORIES_DIR = HERMES_HOME / "memories"
+CRON_DIR = HERMES_HOME / "cron"
diff --git a/self_evolution/plugin.yaml b/self_evolution/plugin.yaml
new file mode 100644
index 0000000000..3b6cb33355
--- /dev/null
+++ b/self_evolution/plugin.yaml
@@ -0,0 +1,7 @@
+name: self_evolution
+version: "1.0.0"
+description: "Agent self-optimization and continuous evolution — dream consolidation at 1:00, Feishu approval at 19:00"
+provides_hooks:
+  - post_tool_call
+  - on_session_end
+  - pre_llm_call
diff --git a/self_evolution/prompts/reflection.md b/self_evolution/prompts/reflection.md
new file mode 100644
index 0000000000..f5a10792df
--- /dev/null
+++ b/self_evolution/prompts/reflection.md
@@ -0,0 +1,7 @@
+## 概况
+- 时段: {period_range}
+- Session 数: {sessions_count}, 平均质量: {avg_score}
+- 工具调用: {total_invocations} 次, 成功率 {success_rate}%
+
+## 数据
+{data_json}
diff --git a/self_evolution/quality_scorer.py b/self_evolution/quality_scorer.py
new file mode 100644
index 0000000000..4cb0de00c5
--- /dev/null
+++ b/self_evolution/quality_scorer.py
@@ -0,0 +1,177 @@
+"""
+Self Evolution Plugin — Quality Scorer
+=======================================
+
+Computes a composite quality score for each session:
+
+  session_quality = 0.4 * completion_rate
+                  + 0.2 * efficiency_score
+                  + 0.15 * cost_efficiency
+                  + 0.25 * satisfaction_proxy
+
+Zero API cost — pure computation from already-collected session data.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict
+
+from self_evolution.models import QualityScore
+
+logger = logging.getLogger(__name__)
+
+# ── Weights ──────────────────────────────────────────────────────────────
+
+W_COMPLETION = 0.40
+W_EFFICIENCY = 0.20
+W_COST = 0.15
+W_SATISFACTION = 0.25
+
+# Ideal iteration counts by task complexity
+IDEAL_ITERATIONS = {
+    "simple": 3,
+    "medium": 8,
+    "complex": 15,
+}
+DEFAULT_IDEAL_ITERATIONS = 8
+
+
+def compute_score(session_data: dict) -> QualityScore:
+    """Compute a composite quality score from session data.
+
+    Args:
+        session_data: dict with keys like:
+            - completed, interrupted, partial
+            - iterations, max_iterations
+            - tool_call_count, message_count
+            - input_tokens, output_tokens, estimated_cost_usd
+            - duration_seconds
+            - model, platform
+            - messages (list)
+
+    Returns:
+        QualityScore with individual and composite scores.
+    """
+    session_id = session_data.get("session_id", "")
+
+    completion = _completion_rate(session_data)
+    efficiency = _efficiency_score(session_data)
+    cost = _cost_efficiency(session_data)
+    satisfaction = _satisfaction_proxy(session_data)
+
+    composite = (
+        W_COMPLETION * completion
+        + W_EFFICIENCY * efficiency
+        + W_COST * cost
+        + W_SATISFACTION * satisfaction
+    )
+
+    return QualityScore(
+        session_id=session_id,
+        composite=round(composite, 3),
+        completion_rate=round(completion, 3),
+        efficiency_score=round(efficiency, 3),
+        cost_efficiency=round(cost, 3),
+        satisfaction_proxy=round(satisfaction, 3),
+        task_category=_detect_task_category(session_data),
+        model=session_data.get("model", ""),
+    )
+
+
+# ── Individual Score Components ──────────────────────────────────────────
+
+def _completion_rate(session_data: dict) -> float:
+    """1.0 if completed, 0.5 if interrupted, 0.0 if failed."""
+    if session_data.get("completed"):
+        return 1.0
+    if session_data.get("interrupted"):
+        return 0.5
+    if session_data.get("partial"):
+        return 0.3
+    return 0.0
+
+
+def _efficiency_score(session_data: dict) -> float:
+    """Ideal iterations / actual iterations, capped at 1.0."""
+    iterations = session_data.get("iterations", 0) or session_data.get("tool_call_count", 0)
+    if iterations <= 0:
+        return 1.0
+
+    category = _detect_task_category(session_data)
+    ideal = IDEAL_ITERATIONS.get(category, DEFAULT_IDEAL_ITERATIONS)
+
+    return min(1.0, ideal / max(iterations, 1))
+
+
+def _cost_efficiency(session_data: dict) -> float:
+    """Baseline cost / actual cost, capped at 1.0.
+
+    Uses message count as a proxy for expected work.
+    """
+    messages = session_data.get("message_count", 1) or 1
+    tool_calls = session_data.get("tool_call_count", 0) or 0
+    iterations = session_data.get("iterations", 0) or 0
+
+    # Expected: roughly 2 tool calls per user message
+    expected_tool_calls = messages * 2
+
+    if expected_tool_calls <= 0:
+        return 1.0
+
+    return min(1.0, expected_tool_calls / max(tool_calls, 1))
+
+
+def _satisfaction_proxy(session_data: dict) -> float:
+    """Estimate satisfaction from behavioral signals.
+
+    Signals:
+    - Single-turn session (user got what they needed) = high
+    - Multi-turn but completed = medium-high
+    - User corrections detected = lower
+    - Budget exhausted = low
+    """
+    messages = session_data.get("message_count", 1) or 1
+    completed = session_data.get("completed", False)
+    max_iterations = session_data.get("max_iterations", 0)
+    iterations = session_data.get("iterations", 0)
+
+    score = 0.7  # baseline
+
+    # Single-turn completion is a strong positive signal
+    if messages <= 2 and completed:
+        score = 0.9
+    elif completed:
+        score = 0.75
+    elif messages > 10:
+        score = 0.5
+
+    # Budget exhaustion is a negative signal
+    if max_iterations and iterations >= max_iterations:
+        score -= 0.2
+
+    return max(0.0, min(1.0, score))
+
+
+# ── Task Category Detection ──────────────────────────────────────────────
+
+def _detect_task_category(session_data: dict) -> str:
+    """Detect task category from tool usage patterns."""
+    tool_names = session_data.get("tool_names", [])
+    if isinstance(tool_names, str):
+        tool_names = tool_names.split(",")
+
+    tool_set = set(t.lower() for t in tool_names) if tool_names else set()
+
+    coding_tools = {"terminal", "bash", "write", "edit", "file_write", "file_edit"}
+    web_tools = {"web_search", "browser", "browser_navigate", "scrape", "fetch"}
+    file_tools = {"read", "file_read", "grep", "glob", "find"}
+
+    if tool_set & coding_tools:
+        return "coding"
+    if tool_set & web_tools:
+        return "web_research"
+    if tool_set & file_tools:
+        return "file_analysis"
+
+    return "general"
diff --git a/self_evolution/reflection_engine.py b/self_evolution/reflection_engine.py
new file mode 100644
index 0000000000..17269fea0d
--- /dev/null
+++ b/self_evolution/reflection_engine.py
@@ -0,0 +1,751 @@
+"""
+Self Evolution Plugin — Dream Engine (Reflection Engine)
+=========================================================
+
+Runs nightly at 1:00 to analyze the previous day's sessions.
+
+Design reference: Claude Code plugins/hookify/agents/conversation-analyzer.md
+  - Analyzes conversations in reverse chronological order
+  - Detects: corrections, frustrations, repeated issues, reversions
+  - Extracts tool usage patterns, converts to actionable rules
+  - Categorizes by severity
+
+We extend this pattern with:
+  - Full automated analysis (not just on user request)
+  - Error analysis (tool failures, retries, API errors)
+  - Time waste analysis (slow tools, repeated ops, inefficient sessions)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import re
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from self_evolution import db
+from self_evolution.model_config import resolve_config, get_active_text_config, switch_to_fallback
+from self_evolution.git_analyzer import analyze_code_changes
+from self_evolution.models import (
+    ErrorAnalysis, ToolFailure, RetryPattern,
+    WasteAnalysis, ToolDuration, RepeatedOperation,
+    CodeChangeAnalysis, CommitInfo,
+    ReflectionReport,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ── Backward-compatible aliases ────────────────────────────────────────────
+# These are used by cron_jobs.py and other callers.
+_resolve_runtime_config = resolve_config
+_get_active_text_config = get_active_text_config
+_switch_to_fallback = switch_to_fallback
+
+
+class DreamEngine:
+    """Nightly dream consolidation engine.
+
+    Analyzes the previous day's sessions to find:
+    1. Error patterns (tool failures, retries, incomplete tasks)
+    2. Time waste patterns (slow tools, repeated operations, inefficient flows)
+    3. Success patterns (what worked well)
+    4. Generates actionable evolution proposals
+    """
+
+    def __init__(self, config: dict = None):
+        self.config = config or _resolve_runtime_config()
+        self._model_client = None
+        self._current_prompt = ""
+
+    def run(self, hours: int = 24, max_runtime_seconds: int = 0) -> Optional[ReflectionReport]:
+        """Main dream consolidation flow.
+
+        Args:
+            hours: Analyze data from the last N hours.
+            max_runtime_seconds: Hard timeout in seconds. 0 = no limit.
+                If exceeded, stops at the next step boundary and returns None.
+        """
+        logger.info("Dream engine starting — analyzing last %d hours", hours)
+
+        deadline = time.time() + max_runtime_seconds if max_runtime_seconds > 0 else 0
+
+        now = time.time()
+        cutoff = now - (hours * 3600)
+
+        try:
+            # 1. Load session data
+            scores = db.fetch_all(
+                "session_scores",
+                where="created_at >= ?",
+                params=(cutoff,),
+                order_by="created_at DESC",
+            )
+            tool_invocations = db.fetch_all(
+                "tool_invocations",
+                where="created_at >= ?",
+                params=(cutoff,),
+                order_by="created_at DESC",
+            )
+            signals = db.fetch_all(
+                "outcome_signals",
+                where="created_at >= ?",
+                params=(cutoff,),
+            )
+
+            if not scores:
+                logger.info("No sessions to analyze")
+                return None
+
+            # 2. Error analysis
+            if deadline and time.time() > deadline:
+                logger.warning("Dream engine timed out before error analysis")
+                return None
+            error_analysis = self._analyze_errors(scores, tool_invocations, signals)
+            logger.info("Error analysis: %s", error_analysis.summary())
+
+            # 3. Time waste analysis
+            if deadline and time.time() > deadline:
+                logger.warning("Dream engine timed out before waste analysis")
+                return None
+            waste_analysis = self._analyze_time_waste(scores, tool_invocations)
+            logger.info("Waste analysis: %s", waste_analysis.summary())
+
+            # 3.5. Code change analysis
+            if deadline and time.time() > deadline:
+                logger.warning("Dream engine timed out before code analysis")
+                return None
+            code_analysis = analyze_code_changes(hours=hours)
+            logger.info("Code change analysis: %d commits found", code_analysis.total_commits)
+
+            # 4. Compute average score
+            avg_score = (
+                sum(s.get("composite_score", 0) for s in scores) / len(scores)
+                if scores else 0
+            )
+
+            # 5. Build reflection prompt
+            if deadline and time.time() > deadline:
+                logger.warning("Dream engine timed out before model call")
+                return None
+            prompt = self._build_reflection_prompt(
+                scores, tool_invocations, signals,
+                error_analysis, waste_analysis, avg_score,
+                code_analysis=code_analysis,
+            )
+
+            # 6. Call model for deep reflection
+            reflection_text = self._call_model(prompt)
+            if not reflection_text:
+                logger.warning("Model returned empty reflection")
+                return None
+
+            # 7. Parse reflection report
+            report = self._parse_reflection(
+                reflection_text=reflection_text,
+                period_start=cutoff,
+                period_end=now,
+                sessions_analyzed=len(scores),
+                avg_score=avg_score,
+                error_analysis=error_analysis,
+                waste_analysis=waste_analysis,
+                code_analysis=code_analysis,
+            )
+
+            # 8. Store report
+            report_id = db.insert("reflection_reports", report.to_db_row())
+            logger.info("Reflection report saved: id=%d, avg_score=%.3f", report_id, avg_score)
+
+            # 9. Generate evolution proposals
+            from self_evolution.evolution_proposer import generate_proposals
+            proposals = generate_proposals(report, report_id)
+            for p in proposals:
+                db.insert("evolution_proposals", p.to_db_row())
+            logger.info("Generated %d evolution proposals", len(proposals))
+
+            # 10. Compress existing strategies
+            try:
+                from self_evolution.strategy_compressor import compress_strategies
+                from self_evolution.strategy_store import StrategyStore
+                store = StrategyStore()
+                data = store.load()
+                rules = data.get("rules", [])
+                compressed = compress_strategies(rules)
+                if len(compressed) < len(rules):
+                    data["rules"] = compressed
+                    store.save(data)
+                    logger.info("Strategies compressed: %d → %d", len(rules), len(compressed))
+            except Exception as exc:
+                logger.warning("Strategy compression failed: %s", exc)
+
+            # 11. Cleanup old data
+            db.cleanup(days=30)
+
+            return report
+
+        except Exception as exc:
+            logger.exception("Dream engine failed: %s", exc)
+            return None
+
+    # ── Error Analysis ────────────────────────────────────────────────────
+
+    def _analyze_errors(
+        self,
+        scores: List[dict],
+        invocations: List[dict],
+        signals: List[dict],
+    ) -> ErrorAnalysis:
+        """Analyze all errors in the period.
+
+        Inspired by Claude Code conversation-analyzer's signal detection.
+        """
+        # Tool failures
+        failures = {}
+        for inv in invocations:
+            if not inv.get("success", True):
+                tool = inv.get("tool_name", "unknown")
+                error_type = inv.get("error_type", "unknown")
+                key = f"{tool}:{error_type}"
+                if key not in failures:
+                    failures[key] = ToolFailure(
+                        tool_name=tool,
+                        error_type=error_type,
+                        count=0,
+                        sessions_affected=[],
+                        example_session=inv.get("session_id", ""),
+                    )
+                failures[key].count += 1
+                sid = inv.get("session_id", "")
+                if sid and sid not in failures[key].sessions_affected:
+                    failures[key].sessions_affected.append(sid)
+
+        # Retry patterns (same tool called > 2 times in same session)
+        retries = self._detect_retry_patterns(invocations)
+
+        # Incomplete sessions
+        incomplete = [
+            s.get("session_id", "") for s in scores
+            if s.get("completion_rate", 1.0) < 0.5
+        ]
+
+        # User corrections from signals
+        corrections = [s for s in signals if s.get("signal_type") == "correction"]
+        frustration = [s for s in signals if s.get("signal_type") == "frustration"]
+        api_errors = [s for s in signals if s.get("signal_type") == "api_error"]
+
+        # API error type distribution
+        api_error_types: Dict[str, int] = {}
+        for s in api_errors:
+            meta = json.loads(s.get("metadata", "{}"))
+            etype = meta.get("error_type", "unknown")
+            api_error_types[etype] = api_error_types.get(etype, 0) + 1
+
+        return ErrorAnalysis(
+            tool_failures=sorted(failures.values(), key=lambda x: x.count, reverse=True),
+            retry_patterns=retries,
+            incomplete_sessions=incomplete,
+            user_corrections=len(corrections),
+            correction_examples=[s.get("metadata", "") for s in corrections[:3]],
+            api_error_count=len(api_errors),
+            api_error_types=api_error_types,
+        )
+
+    def _detect_retry_patterns(self, invocations: List[dict]) -> List[RetryPattern]:
+        """Detect tools called > 2 times in same session."""
+        session_tools: Dict[str, Dict[str, int]] = {}
+        for inv in invocations:
+            sid = inv.get("session_id", "")
+            tool = inv.get("tool_name", "")
+            if sid not in session_tools:
+                session_tools[sid] = {}
+            session_tools[sid][tool] = session_tools[sid].get(tool, 0) + 1
+
+        patterns = []
+        for sid, tools in session_tools.items():
+            for tool, count in tools.items():
+                if count > 2:
+                    patterns.append(RetryPattern(
+                        session_id=sid,
+                        tool_name=tool,
+                        attempt_count=count,
+                        final_outcome="unknown",
+                    ))
+        return sorted(patterns, key=lambda x: x.attempt_count, reverse=True)[:20]
+
+    # ── Time Waste Analysis ───────────────────────────────────────────────
+
+    def _analyze_time_waste(
+        self,
+        scores: List[dict],
+        invocations: List[dict],
+    ) -> WasteAnalysis:
+        """Analyze time waste patterns."""
+        # Slowest tools
+        tool_durations: Dict[str, List[int]] = {}
+        for inv in invocations:
+            tool = inv.get("tool_name", "")
+            duration = inv.get("duration_ms", 0)
+            if not duration:
+                continue
+            if tool not in tool_durations:
+                tool_durations[tool] = []
+            tool_durations[tool].append(duration)
+
+        slowest = [
+            ToolDuration(
+                tool_name=tool,
+                total_duration_ms=sum(durs),
+                call_count=len(durs),
+                avg_duration_ms=sum(durs) / len(durs),
+            )
+            for tool, durs in tool_durations.items()
+        ]
+        slowest.sort(key=lambda x: x.avg_duration_ms, reverse=True)
+
+        # Repeated operations (same tool + same session > 3 times)
+        session_tool_calls: Dict[str, Dict[str, int]] = {}
+        for inv in invocations:
+            sid = inv.get("session_id", "")
+            tool = inv.get("tool_name", "")
+            if sid not in session_tool_calls:
+                session_tool_calls[sid] = {}
+            session_tool_calls[sid][tool] = session_tool_calls[sid].get(tool, 0) + 1
+
+        repeated = []
+        for sid, tools in session_tool_calls.items():
+            for tool, count in tools.items():
+                if count > 3:
+                    repeated.append(RepeatedOperation(
+                        description=f"{tool} called {count} times",
+                        count=count,
+                        sessions=[sid],
+                        wasted_ms=tool_durations.get(tool, [0])[0] * (count - 2) if tool in tool_durations else 0,
+                    ))
+
+        # Inefficient sessions (low efficiency score)
+        inefficient = [
+            s.get("session_id", "") for s in scores
+            if s.get("efficiency_score", 1.0) < 0.3
+        ]
+
+        return WasteAnalysis(
+            slowest_tools=slowest[:10],
+            repeated_operations=sorted(repeated, key=lambda x: x.count, reverse=True)[:10],
+            inefficient_sessions=inefficient,
+            shortcut_opportunities=[],
+        )
+
+    # ── Reflection Prompt ─────────────────────────────────────────────────
+
+    def _build_reflection_prompt(
+        self,
+        scores: List[dict],
+        invocations: List[dict],
+        signals: List[dict],
+        errors: ErrorAnalysis,
+        waste: WasteAnalysis,
+        avg_score: float,
+        code_analysis: CodeChangeAnalysis = None,
+    ) -> str:
+        """Build the reflection prompt as structured JSON data.
+
+        All analysis results are serialized as JSON so the model receives
+        lossless data instead of pre-summarized text.
+        """
+        # Load user prompt template (short: just overview + data placeholder)
+        template_path = Path(__file__).parent / "prompts" / "reflection.md"
+        if template_path.exists():
+            template = template_path.read_text(encoding="utf-8")
+        else:
+            template = _DEFAULT_REFLECTION_PROMPT
+
+        # Compute statistics
+        total_invocations = len(invocations)
+        success_rate = (
+            sum(1 for i in invocations if i.get("success", True)) / total_invocations * 100
+            if total_invocations else 100
+        )
+
+        # Period range
+        if scores:
+            ts_min = min(s.get("created_at", 0) for s in scores)
+            ts_max = max(s.get("created_at", 0) for s in scores)
+            period_range = (
+                f"{time.strftime('%m-%d %H:%M', time.localtime(ts_min))} ~ "
+                f"{time.strftime('%m-%d %H:%M', time.localtime(ts_max))}"
+            )
+        else:
+            period_range = "N/A"
+
+        # Build structured data JSON — compact format to save tokens
+        data = {}
+
+        # 1. Sessions — compact: [score, completion, efficiency, cost, satisfaction, category]
+        data["sessions"] = [
+            [
+                round(s.get("composite_score", 0), 2),
+                round(s.get("completion_rate", 0), 2),
+                round(s.get("efficiency_score", 0), 2),
+                round(s.get("cost_efficiency", 0), 2),
+                round(s.get("satisfaction_proxy", 0), 2),
+                s.get("task_category", ""),
+            ]
+            for s in scores
+        ]
+
+        # 2. Tool usage — compact: {tool: [calls, failures, avg_ms]}
+        tool_stats: Dict[str, List[int]] = {}
+        for inv in invocations:
+            tool = inv.get("tool_name", "")
+            if tool not in tool_stats:
+                tool_stats[tool] = [0, 0, 0]  # calls, failures, total_ms
+            tool_stats[tool][0] += 1
+            if not inv.get("success", True):
+                tool_stats[tool][1] += 1
+            tool_stats[tool][2] += inv.get("duration_ms", 0) or 0
+        data["tools"] = {
+            t: [v[0], v[1], round(v[2] / max(v[0], 1))]
+            for t, v in sorted(tool_stats.items(), key=lambda x: x[1][2], reverse=True)
+        }
+
+        # 3. Signals — compact: {type: count}
+        signal_types = {}
+        for s in signals:
+            stype = s.get("signal_type", "unknown")
+            signal_types[stype] = signal_types.get(stype, 0) + 1
+        data["signals"] = signal_types
+
+        # 4. Errors — only non-empty fields
+        err_data = {}
+        if errors.tool_failures:
+            err_data["tool_failures"] = [
+                f"{tf.tool_name}:{tf.error_type}x{tf.count}"
+                for tf in errors.tool_failures
+            ]
+        if errors.retry_patterns:
+            err_data["retries"] = [
+                f"{rp.tool_name}x{rp.attempt_count}"
+                for rp in errors.retry_patterns[:5]
+            ]
+        if errors.incomplete_sessions:
+            err_data["incomplete"] = len(errors.incomplete_sessions)
+        if errors.user_corrections:
+            err_data["corrections"] = errors.user_corrections
+            if errors.correction_examples:
+                err_data["correction_examples"] = errors.correction_examples[:2]
+        if errors.api_error_count:
+            err_data["api_errors"] = errors.api_error_count
+        if err_data:
+            data["errors"] = err_data
+
+        # 5. Waste — only non-empty
+        waste_data = {}
+        if waste.slowest_tools:
+            waste_data["slowest"] = [
+                f"{td.tool_name} {round(td.avg_duration_ms)}ms/{td.call_count}calls"
+                for td in waste.slowest_tools[:5]
+            ]
+        if waste.repeated_operations:
+            waste_data["repeated"] = [
+                f"{ro.description} x{ro.count}"
+                for ro in waste.repeated_operations[:3]
+            ]
+        if waste.inefficient_sessions:
+            waste_data["inefficient"] = len(waste.inefficient_sessions)
+        if waste_data:
+            data["waste"] = waste_data
+
+        # 6. Code changes — flat compact format
+        if code_analysis and code_analysis.commits:
+            cc = code_analysis
+            commits_data = []
+            for c in cc.commits[:10]:
+                entry = f"{c.hash_short} {c.subject} +{c.insertions}/-{c.deletions}"
+                if c.file_list:
+                    entry += f" [{','.join(c.file_list[:5])}]"
+                if c.body:
+                    entry += f" | {c.body[:150]}"
+                commits_data.append(entry)
+            data["code_changes"] = {
+                "stats": f"{cc.total_commits} commits +{cc.total_insertions}/-{cc.total_deletions} lines {cc.total_files_changed} files",
+                "categories": cc.change_categories,
+                "areas": cc.areas_touched,
+                "commits": commits_data,
+            }
+
+        data_json = json.dumps(data, ensure_ascii=False, indent=2)
+
+        # Fill template
+        prompt = template.replace("{period_range}", period_range)
+        prompt = prompt.replace("{sessions_count}", str(len(scores)))
+        prompt = prompt.replace("{avg_score}", f"{avg_score:.3f}")
+        prompt = prompt.replace("{total_invocations}", str(total_invocations))
+        prompt = prompt.replace("{success_rate}", f"{success_rate:.1f}")
+        prompt = prompt.replace("{data_json}", data_json)
+
+        return prompt
+
+    # ── Model Call ────────────────────────────────────────────────────────
+
+    def _call_model(self, prompt: str) -> Optional[str]:
+        """Call the active model with automatic failover.
+
+        Resolution order:
+          1. Primary model (glm-5.1 via zai)
+          2. Fallback model (Qwen3.6 via local) — if primary fails
+        Health check: when on fallback, probes primary every 30 min
+        and switches back when it recovers.
+        """
+        self._current_prompt = prompt
+
+        active_cfg, is_fallback = _get_active_text_config(self.config)
+        base_url = active_cfg.get("base_url", "")
+        api_key = active_cfg.get("api_key", "")
+        model = active_cfg.get("model", "")
+
+        if not base_url or not model:
+            logger.warning("Incomplete runtime config: base_url=%s model=%s",
+                           bool(base_url), model)
+            return None
+
+        result = self._call_chat_completions(base_url, api_key, model)
+
+        # If primary failed, try fallback
+        if result is None and not is_fallback:
+            fallback = self.config.get("fallback", {})
+            if fallback.get("base_url") and fallback.get("model"):
+                logger.warning("Primary model failed, trying fallback: %s",
+                               fallback.get("model"))
+                result = self._call_chat_completions(
+                    fallback["base_url"], fallback.get("api_key", ""),
+                    fallback["model"],
+                )
+                if result is not None:
+                    _switch_to_fallback()
+
+        return result
+
+    def _call_chat_completions(
+        self, base_url: str, api_key: str, model: str,
+    ) -> Optional[str]:
+        """Call OpenAI-compatible /chat/completions endpoint."""
+        try:
+            import requests
+            url = f"{base_url.rstrip('/')}/chat/completions"
+            headers = {"Content-Type": "application/json"}
+            if api_key:
+                headers["Authorization"] = f"Bearer {api_key}"
+
+            resp = requests.post(
+                url,
+                headers=headers,
+                json={
+                    "model": model,
+                    "messages": [
+                        {"role": "system", "content": _SYSTEM_PROMPT},
+                        {"role": "user", "content": self._current_prompt or ""},
+                    ],
+                    "temperature": 0.3,
+                },
+                timeout=300,
+            )
+            if resp.status_code == 200:
+                data = resp.json()
+                return data.get("choices", [{}])[0].get("message", {}).get("content", "")
+            else:
+                logger.debug("Model call failed: %d %s", resp.status_code, resp.text[:200])
+        except Exception as exc:
+            logger.debug("Chat completions call failed: %s", exc)
+        return None
+
+    # ── Multimodal Call ───────────────────────────────────────────────────
+
+    def call_multimodal(self, prompt: str, images: list = None) -> Optional[str]:
+        """Call multimodal model with text and optional images.
+
+        Routes to local multimodal model (gemma-4-26b-a4b-it-4bit) when
+        images are involved. Falls back to text model if no images.
+
+        Args:
+            prompt: Text prompt.
+            images: List of image data, each item is either:
+                - URL string (http/https/data:image)
+                - bytes (raw image data, auto-encoded to base64)
+
+        Returns:
+            Model response text, or None on failure.
+        """
+        mm = self.config.get("multimodal", {})
+        if not mm or not mm.get("base_url"):
+            logger.debug("No multimodal model configured, falling back to text")
+            return self._call_model(prompt)
+
+        # Build content with images
+        content = [{"type": "text", "text": prompt}]
+        for img in (images or []):
+            if isinstance(img, bytes):
+                import base64
+                b64 = base64.b64encode(img).decode()
+                content.append({
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{b64}"},
+                })
+            elif isinstance(img, str):
+                content.append({
+                    "type": "image_url",
+                    "image_url": {"url": img},
+                })
+
+        try:
+            from openai import OpenAI
+            client = OpenAI(
+                base_url=mm["base_url"].rstrip("/") + ("/v1" if not mm["base_url"].rstrip("/").endswith("/v1") else ""),
+                api_key=mm.get("api_key") or "no-key",
+            )
+            resp = client.chat.completions.create(
+                model=mm["model"],
+                messages=[{"role": "user", "content": content}],
+                temperature=0.3,
+                max_tokens=2000,
+                timeout=120,
+            )
+            return resp.choices[0].message.content
+        except Exception as exc:
+            logger.debug("Multimodal call failed: %s", exc)
+            return None
+
+    # ── Reflection Parsing ────────────────────────────────────────────────
+
+    def _parse_reflection(
+        self,
+        reflection_text: str,
+        period_start: float,
+        period_end: float,
+        sessions_analyzed: int,
+        avg_score: float,
+        error_analysis: ErrorAnalysis,
+        waste_analysis: WasteAnalysis,
+        code_analysis: CodeChangeAnalysis = None,
+    ) -> ReflectionReport:
+        """Parse model output into structured ReflectionReport.
+
+        Extraction cascade:
+          1. Direct JSON parse
+          2. Strip markdown ```json ... ``` wrapper, retry JSON
+          3. Extract JSON object via regex (handle trailing text)
+          4. Text-based section extraction (fallback)
+        """
+        worst_patterns = []
+        best_patterns = []
+        recommendations = []
+        tool_insights = {}
+
+        text = reflection_text.strip()
+
+        # 1. Direct JSON parse
+        data = _try_parse_json(text)
+
+        if data is None:
+            # 2. Strip markdown wrapper
+            m = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
+            if m:
+                data = _try_parse_json(m.group(1))
+
+        if data is None:
+            # 3. Regex extract first JSON object
+            m = re.search(r'\{[^{}]*"(?:worst|best|recommendations)"[^{}]*\}', text, re.DOTALL)
+            if m:
+                data = _try_parse_json(m.group(0))
+
+        if data is None:
+            # 3.5. Broader regex — find outermost braces
+            start = text.find('{')
+            end = text.rfind('}')
+            if start != -1 and end > start:
+                data = _try_parse_json(text[start:end + 1])
+
+        if data is not None:
+            worst_patterns = data.get("worst_patterns") or []
+            best_patterns = data.get("best_patterns") or []
+            recommendations = data.get("recommendations") or []
+            tool_insights = data.get("tool_insights") or {}
+        else:
+            # 4. Text-based extraction
+            section = None
+            for line in text.split("\n"):
+                stripped = line.strip()
+                lower = stripped.lower()
+                if ("worst" in lower and "pattern" in lower) or "最差" in stripped or "错误模式" in stripped:
+                    section = "worst"
+                elif ("best" in lower and "pattern" in lower) or "最佳" in stripped or "成功" in stripped:
+                    section = "best"
+                elif ("recommend" in lower) or "建议" in stripped:
+                    section = "rec"
+                elif stripped.startswith("- ") or stripped.startswith("* ") or stripped.startswith("• "):
+                    item = stripped.lstrip("-*• ").strip()
+                    if section == "worst":
+                        worst_patterns.append(item)
+                    elif section == "best":
+                        best_patterns.append(item)
+                    elif section == "rec":
+                        recommendations.append(item)
+                elif len(stripped) > 2 and stripped[0].isdigit() and stripped[1] in ".)" and stripped[2] == " ":
+                    item = stripped[3:].strip()
+                    if section == "worst":
+                        worst_patterns.append(item)
+                    elif section == "best":
+                        best_patterns.append(item)
+                    elif section == "rec":
+                        recommendations.append(item)
+
+        return ReflectionReport(
+            period_start=period_start,
+            period_end=period_end,
+            sessions_analyzed=sessions_analyzed,
+            avg_score=avg_score,
+            error_summary=error_analysis.summary(),
+            waste_summary=waste_analysis.summary(),
+            worst_patterns=worst_patterns,
+            best_patterns=best_patterns,
+            tool_insights=tool_insights,
+            recommendations=recommendations,
+            code_change_summary=code_analysis.summary() if code_analysis else "",
+            model_used=self.config.get("model", "unknown"),
+        )
+
+
+# ── Default Prompt Template ──────────────────────────────────────────────
+
+_SYSTEM_PROMPT = (
+    "你是 Hermes Agent 性能分析引擎。分析运行数据+代码变更，输出严格JSON（无markdown）。\n"
+    "格式:\n"
+    '{"worst_patterns":["模式(工具+场景+根因)"],"best_patterns":["成功经验"],'
+    '"tool_insights":{"工具":{"sr":0.95,"ms":500,"rec":"建议"}},'
+    '"recommendations":["做什么|效果|风险(l/m/h)|验证"]}\n'
+    "重点:系统性错误>偶发,错误连锁,策略vs工具问题,重复操作,代码设计合理性,自我进化状态,"
+    "可固化流程。≤5条建议,优先高影响低风险。无数据时输出空数组。"
+)
+
+
+_DEFAULT_REFLECTION_PROMPT = """## 概况
+- 时段: {period_range}
+- Session 数: {sessions_count}, 平均质量: {avg_score}
+- 工具调用: {total_invocations} 次, 成功率 {success_rate}%
+
+## 数据
+{data_json}
+"""
+
+
+def _try_parse_json(text: str) -> Optional[dict]:
+    """Try to parse JSON, returning None on any failure."""
+    try:
+        data = json.loads(text)
+        if isinstance(data, dict):
+            return data
+    except (json.JSONDecodeError, ValueError):
+        pass
+    return None
diff --git a/self_evolution/rule_engine.py b/self_evolution/rule_engine.py
new file mode 100644
index 0000000000..5aad88950a
--- /dev/null
+++ b/self_evolution/rule_engine.py
@@ -0,0 +1,101 @@
+"""
+Self Evolution Plugin — Rule Engine (Strategy Matching)
+========================================================
+
+Conditional strategy matching engine.
+
+Design reference: Claude Code plugins/hookify/core/rule_engine.py
+  - LRU-cached regex compilation (max 128)
+  - Multiple operators: regex_match, contains, equals, not_contains
+  - All conditions must match (AND logic)
+  - Severity levels: high, medium, low
+"""
+
+from __future__ import annotations
+
+import re
+from functools import lru_cache
+from typing import Any, Dict, List, Optional
+
+from self_evolution.models import StrategyRule, StrategyCondition
+
+
+@lru_cache(maxsize=128)
+def _compile_pattern(pattern: str) -> re.Pattern:
+    """Compile and cache a regex pattern."""
+    return re.compile(pattern, re.IGNORECASE)
+
+
+class StrategyRuleEngine:
+    """Evaluate strategy rules against session context."""
+
+    def match_strategies(
+        self,
+        strategies: List[StrategyRule],
+        context: Dict[str, Any],
+    ) -> List[StrategyRule]:
+        """Return strategies whose conditions match the context."""
+        matched = []
+        for strategy in strategies:
+            if not strategy.enabled:
+                continue
+            if not strategy.conditions:
+                # No conditions = always match
+                matched.append(strategy)
+                continue
+            if self._conditions_match(strategy.conditions, context):
+                matched.append(strategy)
+        return matched
+
+    def _conditions_match(
+        self,
+        conditions: List[StrategyCondition],
+        context: Dict[str, Any],
+    ) -> bool:
+        """All conditions must match (AND logic)."""
+        for cond in conditions:
+            field_value = str(context.get(cond.field, ""))
+            if not self._check_operator(cond.operator, cond.pattern, field_value):
+                return False
+        return True
+
+    def _check_operator(self, op: str, pattern: str, value: str) -> bool:
+        """Apply operator check."""
+        try:
+            if op == "regex_match":
+                return bool(_compile_pattern(pattern).search(value))
+            elif op == "contains":
+                return pattern in value
+            elif op == "equals":
+                return pattern == value
+            elif op == "not_contains":
+                return pattern not in value
+            elif op == "starts_with":
+                return value.startswith(pattern)
+            elif op == "ends_with":
+                return value.endswith(pattern)
+            else:
+                return False
+        except re.error:
+            return False
+
+    def format_hints(self, strategies: List[StrategyRule], max_chars: int = 0) -> str:
+        """Format matched strategies into a system hint string.
+
+        Args:
+            max_chars: If > 0, truncate total output to this many characters.
+        """
+        if not strategies:
+            return ""
+
+        lines = ["[自我进化策略提示]"]
+        for s in strategies:
+            type_prefix = {"hint": "💡", "avoid": "⚠️", "prefer": "✅"}.get(
+                s.strategy_type, "💡"
+            )
+            line = f"{type_prefix} {s.name}: {s.hint_text}"
+            if max_chars and len("\n".join(lines)) + len(line) > max_chars:
+                break
+            lines.append(line)
+
+        return "\n".join(lines)
diff --git a/self_evolution/strategy_compressor.py b/self_evolution/strategy_compressor.py
new file mode 100644
index 0000000000..fcbd6b20b0
--- /dev/null
+++ b/self_evolution/strategy_compressor.py
@@ -0,0 +1,141 @@
+"""
+Self Evolution Plugin — Strategy Compressor
+=============================================
+
+Compresses and merges redundant strategy rules into concise hints.
+
+Called after dream consolidation to keep strategies.json compact.
+Each hint_text must be ≤ 30 chars; strategies without conditions are
+either merged into conditional rules or discarded.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from typing import Any, Dict, List
+
+logger = logging.getLogger(__name__)
+
+# Maximum allowed length for hint_text (characters)
+MAX_HINT_LENGTH = 30
+
+# Keyword clusters used to group similar strategies
+_CLUSTERS: List[Dict[str, Any]] = [
+    {
+        "keywords": ["bash", "路径", "path", "校验", "预检", "验证", "存在"],
+        "hint": "bash前先read验证路径",
+        "condition": {"field": "tool_name", "operator": "contains", "pattern": "bash"},
+    },
+    {
+        "keywords": ["api", "调试", "debug", "降级", "只读", "探查"],
+        "hint": "API失败时降级只读探查",
+        "condition": {"field": "task_type", "operator": "contains", "pattern": "api"},
+    },
+    {
+        "keywords": ["browser", "浏览器", "timeout", "超时", "网页"],
+        "hint": "浏览器操作设置超时保护",
+        "condition": {"field": "tool_name", "operator": "contains", "pattern": "browser"},
+    },
+    {
+        "keywords": ["重试", "retry", "浪费", "重复", "循环"],
+        "hint": "避免重复重试相同操作",
+        "condition": {},
+    },
+]
+
+
+def compress_strategies(rules: List[dict]) -> List[dict]:
+    """Compress strategy rules by merging similar ones.
+
+    Returns a new list of rules with:
+    - Duplicate hint_texts removed
+    - Similar rules merged into cluster summaries
+    - hint_text truncated to MAX_HINT_LENGTH
+    - Non-matching rules dropped if they have no conditions
+    """
+    if not rules:
+        return []
+
+    # Deduplicate by hint_text
+    seen_hints: set[str] = set()
+    unique: list[dict] = []
+    for r in rules:
+        key = r.get("hint_text", "").strip().lower()
+        if key and key not in seen_hints:
+            seen_hints.add(key)
+            unique.append(r)
+
+    # Cluster similar rules
+    clustered = _cluster_rules(unique)
+
+    # Enforce constraints: hint_text ≤ 30 chars, must have conditions
+    result: list[dict] = []
+    for r in clustered:
+        hint = r.get("hint_text", "").strip()
+        conditions = r.get("conditions", [])
+
+        # Skip rules without conditions (they won't be injected anyway)
+        if not conditions:
+            logger.debug("Dropping unconditioned strategy: %s", hint[:40])
+            continue
+
+        # Truncate hint if needed
+        if len(hint) > MAX_HINT_LENGTH:
+            hint = hint[:MAX_HINT_LENGTH]
+            r["hint_text"] = hint
+
+        result.append(r)
+
+    # Also keep any manual/default rules that already have conditions
+    for r in unique:
+        if r.get("source") in ("manual", "default") and r.get("conditions"):
+            if r not in result:
+                hint = r.get("hint_text", "").strip()
+                if len(hint) > MAX_HINT_LENGTH:
+                    r["hint_text"] = hint[:MAX_HINT_LENGTH]
+                result.append(r)
+
+    logger.info("Compressed strategies: %d → %d rules", len(rules), len(result))
+    return result
+
+
+def _cluster_rules(rules: list[dict]) -> list[dict]:
+    """Group rules by keyword clusters and merge each group into one rule."""
+    matched_indices: set[int] = set()
+    merged: list[dict] = []
+
+    for cluster in _CLUSTERS:
+        group: list[dict] = []
+        for i, r in enumerate(rules):
+            text = f"{r.get('name', '')} {r.get('hint_text', '')}".lower()
+            if any(kw in text for kw in cluster["keywords"]):
+                group.append(r)
+                matched_indices.add(i)
+
+        if not group:
+            continue
+
+        # Merge group into one rule
+        first = group[0]
+        condition = cluster.get("condition")
+        merged_rule = {
+            "id": first.get("id", ""),
+            "name": cluster["hint"],
+            "type": "learned",
+            "description": cluster["hint"],
+            "hint_text": cluster["hint"],
+            "conditions": [condition] if condition else [],
+            "severity": "medium",
+            "enabled": True,
+            "source": "learned",
+            "created_at": first.get("created_at", 0),
+        }
+        merged.append(merged_rule)
+
+    # Add unmatched rules as-is
+    for i, r in enumerate(rules):
+        if i not in matched_indices:
+            merged.append(r)
+
+    return merged
diff --git a/self_evolution/strategy_injector.py b/self_evolution/strategy_injector.py
new file mode 100644
index 0000000000..840fdfbd1f
--- /dev/null
+++ b/self_evolution/strategy_injector.py
@@ -0,0 +1,124 @@
+"""
+Self Evolution Plugin — Strategy Injector
+===========================================
+
+Injects learned strategy hints into sessions via pre_llm_call hook.
+
+Design reference: Claude Code plugins/learning-output-style/
+  - SessionStart hook injects behavioral context automatically
+  - Equivalent to CLAUDE.md but more flexible and distributable
+  - No core modification needed
+"""
+
+from __future__ import annotations
+
+import logging
+import time
+from typing import Any, Dict, Optional
+
+from self_evolution.models import StrategyRule
+from self_evolution.rule_engine import StrategyRuleEngine
+
+logger = logging.getLogger(__name__)
+
+_engine = StrategyRuleEngine()
+
+# ── TTL-based cache to avoid reading strategies.json on every LLM call ────
+
+_cached_strategies: list | None = None
+_cache_ts: float = 0.0
+_CACHE_TTL: float = 60.0  # seconds
+
+
+def _load_active_strategies() -> list:
+    """Load active strategies from strategy store (cached for _CACHE_TTL)."""
+    global _cached_strategies, _cache_ts
+
+    now = time.time()
+    if _cached_strategies is not None and (now - _cache_ts) < _CACHE_TTL:
+        return _cached_strategies
+
+    from self_evolution.strategy_store import StrategyStore
+
+    store = StrategyStore()
+    data = store.load()
+    rules = data.get("rules", [])
+
+    strategies = []
+    for rule_data in rules:
+        if not rule_data.get("enabled", True):
+            continue
+        strategy = StrategyRule.from_dict(rule_data)
+        strategies.append(strategy)
+
+    _cached_strategies = strategies
+    _cache_ts = now
+    return strategies
+
+
+def invalidate_cache():
+    """Invalidate the strategy cache (call after strategy updates)."""
+    global _cached_strategies
+    _cached_strategies = None
+
+
+_MAX_INJECT_STRATEGIES = 3     # 最多注入策略数
+_MAX_HINT_CHARS = 100          # 注入提示总字符预算
+_MAX_SINGLE_HINT = 30          # 单条 hint_text 最大字符数
+
+def inject_hints(kwargs: dict) -> Optional[str]:
+    """Pre-llm-call hook: inject learned strategy hints.
+
+    Rules:
+      - Strategies without conditions are skipped (must be condition-based).
+      - hint_text longer than _MAX_SINGLE_HINT chars are skipped.
+      - At most _MAX_INJECT_STRATEGIES hints, total ≤ _MAX_HINT_CHARS.
+    """
+    strategies = _load_active_strategies()
+    if not strategies:
+        return None
+
+    # Build context from current session
+    context = _build_context(kwargs)
+
+    # Match strategies
+    matched = _engine.match_strategies(strategies, context)
+    if not matched:
+        return None
+
+    # Filter: require conditions and enforce hint length
+    eligible = []
+    for s in matched:
+        if not s.conditions:
+            continue  # Skip unconditioned strategies
+        if len(s.hint_text.strip()) > _MAX_SINGLE_HINT:
+            continue  # Skip overly long hints
+        eligible.append(s)
+
+    if not eligible:
+        return None
+
+    # Deduplicate by hint_text content
+    seen_hints: set[str] = set()
+    unique: list = []
+    for s in eligible:
+        key = s.hint_text.strip().lower()
+        if key not in seen_hints:
+            seen_hints.add(key)
+            unique.append(s)
+
+    # Cap count
+    selected = unique[:_MAX_INJECT_STRATEGIES]
+
+    # Format hints within char budget
+    return _engine.format_hints(selected, max_chars=_MAX_HINT_CHARS)
+
+
+def _build_context(kwargs: dict) -> dict:
+    """Build matching context from hook kwargs."""
+    return {
+        "platform": kwargs.get("platform", ""),
+        "model": kwargs.get("model", ""),
+        "task_type": kwargs.get("task_type", ""),
+        "tool_name": kwargs.get("tool_name", ""),
+    }
diff --git a/self_evolution/strategy_store.py b/self_evolution/strategy_store.py
new file mode 100644
index 0000000000..5de8cab7c9
--- /dev/null
+++ b/self_evolution/strategy_store.py
@@ -0,0 +1,72 @@
+"""
+Self Evolution Plugin — Strategy Store
+========================================
+
+Manages strategy rules with version history and rollback support.
+
+Strategies stored at ~/.hermes/self_evolution/strategies.json
+Archives at ~/.hermes/self_evolution/archive/strategies_v{N}.json
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+from self_evolution.paths import DATA_DIR as STRATEGIES_DIR, STRATEGIES_FILE, ARCHIVE_DIR
+
+
+class StrategyStore:
+    """Load, save, and version strategy rules."""
+
+    def load(self) -> dict:
+        """Load current strategies."""
+        if not STRATEGIES_FILE.exists():
+            return {"version": 0, "rules": []}
+        try:
+            return json.loads(STRATEGIES_FILE.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError):
+            return {"version": 0, "rules": []}
+
+    def save(self, data: dict):
+        """Save strategies to file."""
+        STRATEGIES_DIR.mkdir(parents=True, exist_ok=True)
+        STRATEGIES_FILE.write_text(
+            json.dumps(data, ensure_ascii=False, indent=2),
+            encoding="utf-8",
+        )
+
+    def archive(self, version: int):
+        """Archive current strategies for rollback."""
+        if not STRATEGIES_FILE.exists():
+            return
+        ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
+        archive_path = ARCHIVE_DIR / f"strategies_v{version}.json"
+        archive_path.write_text(
+            STRATEGIES_FILE.read_text(encoding="utf-8"),
+            encoding="utf-8",
+        )
+        logger.info("Archived strategies version %d", version)
+
+    def load_archive(self, version: int) -> Optional[dict]:
+        """Load an archived version."""
+        archive_path = ARCHIVE_DIR / f"strategies_v{version}.json"
+        if not archive_path.exists():
+            return None
+        try:
+            return json.loads(archive_path.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError):
+            return None
+
+    def restore(self, data: dict):
+        """Restore strategies from an archive."""
+        self.save(data)
+        logger.info("Restored strategies from archive")
+
+    def get_version(self) -> int:
+        """Get current version number."""
+        return self.load().get("version", 0)
diff --git a/tests/test_self_evolution.py b/tests/test_self_evolution.py
new file mode 100644
index 0000000000..4dc04ccc6a
--- /dev/null
+++ b/tests/test_self_evolution.py
@@ -0,0 +1,1253 @@
+"""
+Tests for the Self Evolution Plugin.
+
+Covers:
+  - quality_scorer: composite score computation
+  - models: dataclass serialization / deserialization
+  - db: SQLite CRUD operations (temp DB)
+  - hooks: telemetry collection + signal detection
+  - rule_engine: strategy condition matching
+  - strategy_store: file-based persistence + archive
+  - evolution_proposer: proposal generation + dedup
+  - evolution_executor: execute + tracking + rollback
+  - reflection_engine: JSON/text parsing of model output
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sqlite3
+import tempfile
+import time
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ============================================================================
+# Fixtures
+# ============================================================================
+
+@pytest.fixture(autouse=True)
+def _tmp_evolution_db(tmp_path, monkeypatch):
+    """Redirect self_evolution DB to a temp directory for every test."""
+    db_dir = tmp_path / ".hermes" / "self_evolution"
+    db_dir.mkdir(parents=True, exist_ok=True)
+    db_path = db_dir / "evolution.db"
+
+    # Patch centralized paths module
+    import self_evolution.paths as paths_mod
+    monkeypatch.setattr(paths_mod, "DATA_DIR", db_dir)
+    monkeypatch.setattr(paths_mod, "DB_PATH", db_path)
+    monkeypatch.setattr(paths_mod, "STRATEGIES_FILE", db_dir / "strategies.json")
+    monkeypatch.setattr(paths_mod, "ARCHIVE_DIR", db_dir / "archive")
+    monkeypatch.setattr(paths_mod, "SKILLS_DIR", tmp_path / ".hermes" / "skills" / "learned")
+    monkeypatch.setattr(paths_mod, "MEMORIES_DIR", tmp_path / ".hermes" / "memories")
+
+    # Also patch the imported names in db module
+    import self_evolution.db as db_mod
+    monkeypatch.setattr(db_mod, "DB_DIR", db_dir)
+    monkeypatch.setattr(db_mod, "DB_PATH", db_path)
+
+    # Initialize schema
+    db_mod.init_db()
+    yield db_mod
+    # Clean up thread-local connection after each test
+    db_mod.close_connection()
+
+
+@pytest.fixture
+def sample_session_data():
+    """Standard session data for quality scoring tests."""
+    return {
+        "session_id": "test-session-001",
+        "completed": True,
+        "iterations": 5,
+        "tool_call_count": 5,
+        "message_count": 3,
+        "duration_seconds": 120,
+        "model": "test-model",
+        "platform": "test",
+        "tool_names": ["bash", "read", "write"],
+    }
+
+
+# ============================================================================
+# 1. Quality Scorer
+# ============================================================================
+
+class TestQualityScorer:
+    """Test the composite quality score computation."""
+
+    def test_completed_session_high_score(self, sample_session_data):
+        from self_evolution.quality_scorer import compute_score
+
+        score = compute_score(sample_session_data)
+        assert score.composite > 0.5, f"Completed session should score > 0.5, got {score.composite}"
+        assert score.completion_rate == 1.0
+        assert score.task_category == "coding"
+
+    def test_interrupted_session_medium_score(self, sample_session_data):
+        from self_evolution.quality_scorer import compute_score
+
+        sample_session_data["completed"] = False
+        sample_session_data["interrupted"] = True
+        score = compute_score(sample_session_data)
+        assert score.completion_rate == 0.5
+
+    def test_partial_session(self, sample_session_data):
+        from self_evolution.quality_scorer import compute_score
+
+        sample_session_data["completed"] = False
+        sample_session_data["partial"] = True
+        score = compute_score(sample_session_data)
+        assert score.completion_rate == 0.3
+
+    def test_efficiency_degrades_with_iterations(self, sample_session_data):
+        from self_evolution.quality_scorer import compute_score
+
+        # Low iterations => high efficiency
+        sample_session_data["iterations"] = 2
+        score_low = compute_score(sample_session_data)
+
+        # High iterations => low efficiency
+        sample_session_data["iterations"] = 50
+        score_high = compute_score(sample_session_data)
+
+        assert score_low.efficiency_score > score_high.efficiency_score
+
+    def test_budget_exhaustion_lowers_satisfaction(self, sample_session_data):
+        from self_evolution.quality_scorer import compute_score
+
+        sample_session_data["max_iterations"] = 5
+        sample_session_data["iterations"] = 5  # exactly at limit
+        score = compute_score(sample_session_data)
+        assert score.satisfaction_proxy < 0.7  # below baseline
+
+    def test_single_turn_completion_high_satisfaction(self, sample_session_data):
+        from self_evolution.quality_scorer import compute_score
+
+        sample_session_data["message_count"] = 2
+        sample_session_data["completed"] = True
+        score = compute_score(sample_session_data)
+        assert score.satisfaction_proxy == 0.9
+
+    def test_task_category_coding(self, sample_session_data):
+        from self_evolution.quality_scorer import compute_score
+
+        sample_session_data["tool_names"] = ["bash", "write"]
+        score = compute_score(sample_session_data)
+        assert score.task_category == "coding"
+
+    def test_task_category_web_research(self, sample_session_data):
+        from self_evolution.quality_scorer import compute_score
+
+        sample_session_data["tool_names"] = ["web_search", "browser"]
+        score = compute_score(sample_session_data)
+        assert score.task_category == "web_research"
+
+    def test_task_category_file_analysis(self, sample_session_data):
+        from self_evolution.quality_scorer import compute_score
+
+        sample_session_data["tool_names"] = ["read", "grep", "glob"]
+        score = compute_score(sample_session_data)
+        assert score.task_category == "file_analysis"
+
+    def test_task_category_general(self, sample_session_data):
+        from self_evolution.quality_scorer import compute_score
+
+        sample_session_data["tool_names"] = []
+        score = compute_score(sample_session_data)
+        assert score.task_category == "general"
+
+    def test_tool_names_as_string(self, sample_session_data):
+        from self_evolution.quality_scorer import compute_score
+
+        sample_session_data["tool_names"] = "bash,read,write"
+        score = compute_score(sample_session_data)
+        assert score.task_category == "coding"
+
+    def test_composite_weighted_sum(self, sample_session_data):
+        """Verify composite = 0.4*completion + 0.2*efficiency + 0.15*cost + 0.25*satisfaction."""
+        from self_evolution.quality_scorer import compute_score
+
+        score = compute_score(sample_session_data)
+        expected = (
+            0.40 * score.completion_rate
+            + 0.20 * score.efficiency_score
+            + 0.15 * score.cost_efficiency
+            + 0.25 * score.satisfaction_proxy
+        )
+        assert abs(score.composite - round(expected, 3)) < 0.001
+
+
+# ============================================================================
+# 2. Models — Serialization
+# ============================================================================
+
+class TestModels:
+    """Test data model serialization and deserialization."""
+
+    def test_quality_score_to_db_row(self):
+        from self_evolution.models import QualityScore
+
+        qs = QualityScore(
+            session_id="s1",
+            composite=0.85,
+            completion_rate=1.0,
+            efficiency_score=0.7,
+            cost_efficiency=0.9,
+            satisfaction_proxy=0.8,
+            task_category="coding",
+            model="test",
+        )
+        row = qs.to_db_row()
+        assert row["session_id"] == "s1"
+        assert row["composite_score"] == 0.85
+        assert row["task_category"] == "coding"
+
+    def test_reflection_report_to_db_row(self):
+        from self_evolution.models import ReflectionReport
+
+        report = ReflectionReport(
+            period_start=1000.0,
+            period_end=2000.0,
+            sessions_analyzed=5,
+            avg_score=0.75,
+            worst_patterns=["pattern1", "pattern2"],
+            best_patterns=["good1"],
+            recommendations=["rec1"],
+        )
+        row = report.to_db_row()
+        assert row["sessions_analyzed"] == 5
+        assert json.loads(row["worst_patterns"]) == ["pattern1", "pattern2"]
+        assert json.loads(row["best_patterns"]) == ["good1"]
+
+    def test_proposal_to_db_row(self):
+        from self_evolution.models import Proposal
+
+        p = Proposal(
+            id="prop-001",
+            proposal_type="strategy",
+            title="Test Proposal",
+            description="A test proposal",
+            risk_assessment="low",
+        )
+        row = p.to_db_row()
+        assert row["id"] == "prop-001"
+        assert row["proposal_type"] == "strategy"
+        assert row["status"] == "pending_approval"
+
+    def test_improvement_unit_should_revert(self):
+        from self_evolution.models import ImprovementUnit
+
+        unit = ImprovementUnit(
+            id="u1",
+            proposal_id="p1",
+            change_type="strategy",
+            baseline_score=0.8,
+            current_score=0.6,
+            sessions_sampled=5,
+            max_regression=0.10,
+        )
+        # Regression = 0.2 > max_regression 0.10 => should revert
+        assert unit.should_revert is True
+
+    def test_improvement_unit_should_not_revert(self):
+        from self_evolution.models import ImprovementUnit
+
+        unit = ImprovementUnit(
+            id="u2",
+            proposal_id="p2",
+            change_type="strategy",
+            baseline_score=0.8,
+            current_score=0.75,
+            sessions_sampled=5,
+            max_regression=0.10,
+        )
+        # Regression = 0.05 < max_regression 0.10 => should NOT revert
+        assert unit.should_revert is False
+
+    def test_improvement_unit_should_promote(self):
+        from self_evolution.models import ImprovementUnit
+
+        unit = ImprovementUnit(
+            id="u3",
+            proposal_id="p3",
+            change_type="strategy",
+            baseline_score=0.7,
+            current_score=0.8,
+            sessions_sampled=15,
+            min_sessions=10,
+            min_improvement=0.05,
+        )
+        # Improvement = 0.1 >= min_improvement 0.05 and sessions >= min_sessions
+        assert unit.should_promote is True
+
+    def test_improvement_unit_should_not_promote_too_few_sessions(self):
+        from self_evolution.models import ImprovementUnit
+
+        unit = ImprovementUnit(
+            id="u4",
+            proposal_id="p4",
+            change_type="strategy",
+            baseline_score=0.7,
+            current_score=0.9,
+            sessions_sampled=5,
+            min_sessions=10,
+            min_improvement=0.05,
+        )
+        assert unit.should_promote is False
+
+    def test_strategy_rule_roundtrip(self):
+        from self_evolution.models import StrategyRule, StrategyCondition
+
+        rule = StrategyRule(
+            id="sr1",
+            name="Avoid large file reads",
+            strategy_type="avoid",
+            description="Don't read files > 1MB",
+            conditions=[
+                StrategyCondition(field="tool_name", operator="equals", pattern="read"),
+            ],
+            hint_text="Use grep instead",
+            severity="high",
+        )
+        d = rule.to_dict()
+        restored = StrategyRule.from_dict(d)
+        assert restored.id == "sr1"
+        assert restored.strategy_type == "avoid"
+        assert len(restored.conditions) == 1
+        assert restored.conditions[0].field == "tool_name"
+
+    def test_error_analysis_summary(self):
+        from self_evolution.models import ErrorAnalysis, ToolFailure
+
+        ea = ErrorAnalysis(
+            tool_failures=[
+                ToolFailure(tool_name="bash", error_type="timeout", count=3),
+            ],
+            retry_patterns=[],
+            incomplete_sessions=["s1"],
+            user_corrections=2,
+        )
+        summary = ea.summary()
+        assert "bash" in summary
+        assert "3" in summary
+        assert "未完成" in summary
+        assert "纠正" in summary
+
+    def test_waste_analysis_summary(self):
+        from self_evolution.models import WasteAnalysis, ToolDuration
+
+        wa = WasteAnalysis(
+            slowest_tools=[
+                ToolDuration(tool_name="bash", total_duration_ms=5000, call_count=5, avg_duration_ms=1000),
+            ],
+        )
+        summary = wa.summary()
+        assert "bash" in summary
+        assert "1000" in summary
+
+    def test_code_change_analysis_summary_empty(self):
+        from self_evolution.models import CodeChangeAnalysis
+
+        cca = CodeChangeAnalysis()
+        assert cca.summary() == "代码更新: 无新提交"
+
+    def test_code_change_analysis_summary_with_commits(self):
+        from self_evolution.models import CodeChangeAnalysis, CommitInfo
+
+        cca = CodeChangeAnalysis(
+            commits=[
+                CommitInfo(hash_short="abc1234", subject="fix: bug", insertions=10, deletions=5),
+            ],
+            total_commits=1,
+            total_insertions=10,
+            total_deletions=5,
+            total_files_changed=2,
+        )
+        summary = cca.summary()
+        assert "abc1234" in summary
+        assert "+10" in summary
+
+
+# ============================================================================
+# 3. Database CRUD
+# ============================================================================
+
+class TestDatabase:
+    """Test SQLite CRUD operations."""
+
+    def test_init_db_creates_tables(self, _tmp_evolution_db):
+        conn = _tmp_evolution_db.get_connection()
+        tables = conn.execute(
+            "SELECT name FROM sqlite_master WHERE type='table'"
+        ).fetchall()
+        table_names = {t["name"] for t in tables}
+        assert "tool_invocations" in table_names
+        assert "session_scores" in table_names
+        assert "evolution_proposals" in table_names
+        assert "improvement_units" in table_names
+        assert "strategy_versions" in table_names
+        conn.close()
+
+    def test_insert_and_fetch(self, _tmp_evolution_db):
+        rowid = _tmp_evolution_db.insert("session_scores", {
+            "session_id": "s-test",
+            "composite_score": 0.85,
+            "completion_rate": 1.0,
+            "efficiency_score": 0.7,
+            "cost_efficiency": 0.9,
+            "satisfaction_proxy": 0.8,
+            "task_category": "coding",
+            "model": "test",
+        })
+        assert rowid > 0
+
+        row = _tmp_evolution_db.fetch_one("session_scores", where="session_id = ?", params=("s-test",))
+        assert row is not None
+        assert row["composite_score"] == 0.85
+
+    def test_insert_many(self, _tmp_evolution_db):
+        rows = [
+            {"session_id": f"s-{i}", "composite_score": 0.5, "completion_rate": 1.0,
+             "efficiency_score": 0.5, "cost_efficiency": 0.5, "satisfaction_proxy": 0.5,
+             "task_category": "general", "model": "test"}
+            for i in range(3)
+        ]
+        _tmp_evolution_db.insert_many("session_scores", rows)
+        all_rows = _tmp_evolution_db.fetch_all("session_scores")
+        assert len(all_rows) == 3
+
+    def test_update(self, _tmp_evolution_db):
+        _tmp_evolution_db.insert("session_scores", {
+            "session_id": "s-upd", "composite_score": 0.5,
+            "completion_rate": 1.0, "efficiency_score": 0.5,
+            "cost_efficiency": 0.5, "satisfaction_proxy": 0.5,
+            "task_category": "general", "model": "test",
+        })
+        _tmp_evolution_db.update(
+            "session_scores",
+            {"composite_score": 0.95},
+            where="session_id = ?",
+            where_params=("s-upd",),
+        )
+        row = _tmp_evolution_db.fetch_one("session_scores", where="session_id = ?", params=("s-upd",))
+        assert row["composite_score"] == 0.95
+
+    def test_fetch_all_with_order_and_limit(self, _tmp_evolution_db):
+        for i in range(5):
+            _tmp_evolution_db.insert("tool_invocations", {
+                "session_id": f"s-{i}",
+                "tool_name": "bash",
+                "duration_ms": i * 100,
+                "success": True,
+                "turn_number": i,
+            })
+        rows = _tmp_evolution_db.fetch_all(
+            "tool_invocations",
+            where="tool_name = ?",
+            params=("bash",),
+            order_by="duration_ms DESC",
+            limit=3,
+        )
+        assert len(rows) == 3
+        assert rows[0]["duration_ms"] == 400
+
+    def test_query(self, _tmp_evolution_db):
+        _tmp_evolution_db.insert("session_scores", {
+            "session_id": "s-q", "composite_score": 0.7,
+            "completion_rate": 1.0, "efficiency_score": 0.5,
+            "cost_efficiency": 0.5, "satisfaction_proxy": 0.5,
+            "task_category": "general", "model": "test",
+        })
+        results = _tmp_evolution_db.query("SELECT COUNT(*) as cnt FROM session_scores")
+        assert results[0]["cnt"] == 1
+
+    def test_cleanup(self, _tmp_evolution_db):
+        old_ts = time.time() - 31 * 86400  # 31 days ago
+        _tmp_evolution_db.insert("tool_invocations", {
+            "session_id": "s-old", "tool_name": "bash",
+            "duration_ms": 100, "success": True, "turn_number": 0,
+            "created_at": old_ts,
+        })
+        _tmp_evolution_db.insert("tool_invocations", {
+            "session_id": "s-new", "tool_name": "bash",
+            "duration_ms": 100, "success": True, "turn_number": 0,
+        })
+        _tmp_evolution_db.cleanup(days=30)
+        remaining = _tmp_evolution_db.fetch_all("tool_invocations")
+        assert len(remaining) == 1
+        assert remaining[0]["session_id"] == "s-new"
+
+
+# ============================================================================
+# 4. Hooks — Telemetry + Signal Detection
+# ============================================================================
+
+class TestHooks:
+    """Test lifecycle hook functions."""
+
+    def test_on_tool_call_inserts_telemetry(self, _tmp_evolution_db):
+        from self_evolution.hooks import on_tool_call
+
+        on_tool_call(
+            tool_name="bash",
+            started_at=time.time(),
+            duration_ms=500,
+            success=True,
+            session_id="s-hook-1",
+            turn_number=3,
+        )
+        rows = _tmp_evolution_db.fetch_all("tool_invocations")
+        assert len(rows) == 1
+        assert rows[0]["tool_name"] == "bash"
+        assert rows[0]["duration_ms"] == 500
+
+    def test_on_tool_call_failure(self, _tmp_evolution_db):
+        from self_evolution.hooks import on_tool_call
+
+        on_tool_call(
+            tool_name="write",
+            success=False,
+            error_type="PermissionError",
+            session_id="s-hook-2",
+        )
+        rows = _tmp_evolution_db.fetch_all("tool_invocations")
+        assert rows[0]["success"] is False or rows[0]["success"] == 0
+        assert rows[0]["error_type"] == "PermissionError"
+
+    def test_on_session_end_computes_score(self, _tmp_evolution_db):
+        from self_evolution.hooks import on_session_end
+
+        on_session_end(session_data={
+            "session_id": "s-end-1",
+            "completed": True,
+            "iterations": 3,
+            "tool_call_count": 3,
+            "message_count": 2,
+            "tool_names": ["bash"],
+        })
+        row = _tmp_evolution_db.fetch_one("session_scores", where="session_id = ?", params=("s-end-1",))
+        assert row is not None
+        assert row["composite_score"] > 0
+
+    def test_on_session_end_no_session_id(self, _tmp_evolution_db):
+        from self_evolution.hooks import on_session_end
+
+        # Should not crash, should not insert anything
+        on_session_end(session_data={})
+        rows = _tmp_evolution_db.fetch_all("session_scores")
+        assert len(rows) == 0
+
+    def test_correction_signal_detected(self, _tmp_evolution_db):
+        from self_evolution.hooks import on_session_end
+
+        on_session_end(session_data={
+            "session_id": "s-corr-1",
+            "completed": True,
+            "iterations": 5,
+            "tool_call_count": 5,
+            "message_count": 3,
+            "messages": [
+                {"role": "assistant", "content": "Done"},
+                {"role": "user", "content": "不对，这不是我想要的"},
+            ],
+        })
+        signals = _tmp_evolution_db.fetch_all(
+            "outcome_signals",
+            where="session_id = ? AND signal_type = ?",
+            params=("s-corr-1", "correction"),
+        )
+        assert len(signals) == 1
+
+    def test_frustration_signal_detected(self, _tmp_evolution_db):
+        from self_evolution.hooks import on_session_end
+
+        on_session_end(session_data={
+            "session_id": "s-frust-1",
+            "completed": True,
+            "iterations": 5,
+            "tool_call_count": 5,
+            "message_count": 3,
+            "messages": [
+                {"role": "assistant", "content": "Done"},
+                {"role": "user", "content": "太慢了，浪费时间"},
+            ],
+        })
+        signals = _tmp_evolution_db.fetch_all(
+            "outcome_signals",
+            where="session_id = ? AND signal_type = ?",
+            params=("s-frust-1", "frustration"),
+        )
+        assert len(signals) == 1
+
+    def test_budget_exhausted_signal(self, _tmp_evolution_db):
+        from self_evolution.hooks import on_session_end
+
+        on_session_end(session_data={
+            "session_id": "s-budget-1",
+            "completed": False,
+            "interrupted": False,
+            "iterations": 20,
+            "max_iterations": 20,
+            "tool_call_count": 20,
+            "message_count": 10,
+        })
+        signals = _tmp_evolution_db.fetch_all(
+            "outcome_signals",
+            where="session_id = ? AND signal_type = ?",
+            params=("s-budget-1", "budget_exhausted"),
+        )
+        assert len(signals) == 1
+
+
+# ============================================================================
+# 5. Rule Engine — Strategy Matching
+# ============================================================================
+
+class TestRuleEngine:
+    """Test conditional strategy matching."""
+
+    def _make_rule(self, strategy_type="hint", conditions=None, enabled=True):
+        from self_evolution.models import StrategyRule, StrategyCondition
+
+        return StrategyRule(
+            id="r1",
+            name="Test Rule",
+            strategy_type=strategy_type,
+            description="desc",
+            conditions=conditions or [],
+            hint_text="test hint",
+            enabled=enabled,
+        )
+
+    def test_always_match_no_conditions(self):
+        from self_evolution.rule_engine import StrategyRuleEngine
+
+        engine = StrategyRuleEngine()
+        rule = self._make_rule()
+        matched = engine.match_strategies([rule], {})
+        assert len(matched) == 1
+
+    def test_disabled_rule_not_matched(self):
+        from self_evolution.rule_engine import StrategyRuleEngine
+
+        engine = StrategyRuleEngine()
+        rule = self._make_rule(enabled=False)
+        matched = engine.match_strategies([rule], {})
+        assert len(matched) == 0
+
+    def test_equals_operator(self):
+        from self_evolution.rule_engine import StrategyRuleEngine
+        from self_evolution.models import StrategyCondition
+
+        engine = StrategyRuleEngine()
+        rule = self._make_rule(conditions=[
+            StrategyCondition(field="tool_name", operator="equals", pattern="bash"),
+        ])
+        assert len(engine.match_strategies([rule], {"tool_name": "bash"})) == 1
+        assert len(engine.match_strategies([rule], {"tool_name": "read"})) == 0
+
+    def test_contains_operator(self):
+        from self_evolution.rule_engine import StrategyRuleEngine
+        from self_evolution.models import StrategyCondition
+
+        engine = StrategyRuleEngine()
+        rule = self._make_rule(conditions=[
+            StrategyCondition(field="task_type", operator="contains", pattern="debug"),
+        ])
+        assert len(engine.match_strategies([rule], {"task_type": "debug python code"})) == 1
+        assert len(engine.match_strategies([rule], {"task_type": "write tests"})) == 0
+
+    def test_regex_match_operator(self):
+        from self_evolution.rule_engine import StrategyRuleEngine
+        from self_evolution.models import StrategyCondition
+
+        engine = StrategyRuleEngine()
+        rule = self._make_rule(conditions=[
+            StrategyCondition(field="platform", operator="regex_match", pattern="feishu|slack"),
+        ])
+        assert len(engine.match_strategies([rule], {"platform": "feishu"})) == 1
+        assert len(engine.match_strategies([rule], {"platform": "discord"})) == 0
+
+    def test_not_contains_operator(self):
+        from self_evolution.rule_engine import StrategyRuleEngine
+        from self_evolution.models import StrategyCondition
+
+        engine = StrategyRuleEngine()
+        rule = self._make_rule(conditions=[
+            StrategyCondition(field="model", operator="not_contains", pattern="mini"),
+        ])
+        assert len(engine.match_strategies([rule], {"model": "gpt-4"})) == 1
+        assert len(engine.match_strategies([rule], {"model": "gpt-4-mini"})) == 0
+
+    def test_starts_with_operator(self):
+        from self_evolution.rule_engine import StrategyRuleEngine
+        from self_evolution.models import StrategyCondition
+
+        engine = StrategyRuleEngine()
+        rule = self._make_rule(conditions=[
+            StrategyCondition(field="platform", operator="starts_with", pattern="feishu"),
+        ])
+        assert len(engine.match_strategies([rule], {"platform": "feishu_web"})) == 1
+        assert len(engine.match_strategies([rule], {"platform": "web_feishu"})) == 0
+
+    def test_and_logic_all_conditions_must_match(self):
+        from self_evolution.rule_engine import StrategyRuleEngine
+        from self_evolution.models import StrategyCondition
+
+        engine = StrategyRuleEngine()
+        rule = self._make_rule(conditions=[
+            StrategyCondition(field="platform", operator="equals", pattern="feishu"),
+            StrategyCondition(field="task_type", operator="contains", pattern="code"),
+        ])
+        # Both match
+        assert len(engine.match_strategies([rule], {"platform": "feishu", "task_type": "code review"})) == 1
+        # Only one matches
+        assert len(engine.match_strategies([rule], {"platform": "feishu", "task_type": "chat"})) == 0
+
+    def test_format_hints(self):
+        from self_evolution.rule_engine import StrategyRuleEngine
+
+        engine = StrategyRuleEngine()
+        rule = self._make_rule(strategy_type="avoid", conditions=[])
+        hint = engine.format_hints([rule])
+        assert "[自我进化策略提示]" in hint
+        assert "Test Rule" in hint
+
+
+# ============================================================================
+# 6. Strategy Store
+# ============================================================================
+
+class TestStrategyStore:
+    """Test strategy persistence with versioning."""
+
+    def test_load_empty(self, tmp_path, monkeypatch):
+        from self_evolution.strategy_store import StrategyStore
+
+        store = StrategyStore()
+        monkeypatch.setattr(
+            "self_evolution.strategy_store.STRATEGIES_FILE",
+            tmp_path / "strategies.json",
+        )
+        monkeypatch.setattr(
+            "self_evolution.strategy_store.ARCHIVE_DIR",
+            tmp_path / "archive",
+        )
+        data = store.load()
+        assert data["version"] == 0
+        assert data["rules"] == []
+
+    def test_save_and_load_roundtrip(self, tmp_path, monkeypatch):
+        from self_evolution.strategy_store import StrategyStore
+
+        store = StrategyStore()
+        sf = tmp_path / "strategies.json"
+        ad = tmp_path / "archive"
+        monkeypatch.setattr("self_evolution.paths.STRATEGIES_FILE", sf)
+        monkeypatch.setattr("self_evolution.paths.ARCHIVE_DIR", ad)
+        monkeypatch.setattr("self_evolution.strategy_store.STRATEGIES_FILE", sf)
+        monkeypatch.setattr("self_evolution.strategy_store.ARCHIVE_DIR", ad)
+
+        data = {"version": 1, "rules": [{"id": "r1", "name": "Rule 1"}]}
+        store.save(data)
+
+        loaded = store.load()
+        assert loaded["version"] == 1
+        assert len(loaded["rules"]) == 1
+
+    def test_archive_and_restore(self, tmp_path, monkeypatch):
+        from self_evolution.strategy_store import StrategyStore
+
+        store = StrategyStore()
+        sf = tmp_path / "strategies.json"
+        ad = tmp_path / "archive"
+        monkeypatch.setattr("self_evolution.paths.STRATEGIES_FILE", sf)
+        monkeypatch.setattr("self_evolution.paths.ARCHIVE_DIR", ad)
+        monkeypatch.setattr("self_evolution.strategy_store.STRATEGIES_FILE", sf)
+        monkeypatch.setattr("self_evolution.strategy_store.ARCHIVE_DIR", ad)
+
+        data_v1 = {"version": 1, "rules": [{"id": "r1"}]}
+        store.save(data_v1)
+        store.archive(1)
+
+        # Overwrite with v2
+        data_v2 = {"version": 2, "rules": [{"id": "r2"}]}
+        store.save(data_v2)
+
+        # Restore v1
+        archive = store.load_archive(1)
+        assert archive["version"] == 1
+        assert archive["rules"][0]["id"] == "r1"
+
+    def test_load_nonexistent_archive(self, tmp_path, monkeypatch):
+        from self_evolution.strategy_store import StrategyStore
+
+        store = StrategyStore()
+        monkeypatch.setattr("self_evolution.paths.ARCHIVE_DIR", tmp_path / "archive")
+        monkeypatch.setattr(
+            "self_evolution.strategy_store.ARCHIVE_DIR",
+            tmp_path / "archive",
+        )
+        assert store.load_archive(999) is None
+
+
+# ============================================================================
+# 7. Evolution Proposer
+# ============================================================================
+
+class TestEvolutionProposer:
+    """Test proposal generation from reflection reports."""
+
+    def _make_report(self, worst=None, best=None, recs=None, sessions=10):
+        from self_evolution.models import ReflectionReport
+
+        return ReflectionReport(
+            period_start=1000.0,
+            period_end=2000.0,
+            sessions_analyzed=sessions,
+            worst_patterns=worst or ["bash timeout frequently"],
+            best_patterns=best or ["single-turn code generation works well"],
+            recommendations=recs or ["创建新的工具偏好来优化bash使用"],
+        )
+
+    def test_generates_proposals_from_report(self):
+        from self_evolution.evolution_proposer import generate_proposals
+
+        report = self._make_report()
+        proposals = generate_proposals(report, report_id=1)
+        assert len(proposals) > 0
+
+    def test_error_pattern_creates_code_improvement_proposal(self):
+        from self_evolution.evolution_proposer import generate_proposals
+
+        report = self._make_report(worst=["tool failure pattern"])
+        proposals = generate_proposals(report, report_id=1)
+        code_proposals = [p for p in proposals if p.proposal_type == "code_improvement"]
+        assert len(code_proposals) > 0
+        # Verify structured description
+        desc = code_proposals[0].description
+        assert "问题描述" in desc
+        assert "建议方向" in desc
+
+    def test_success_pattern_creates_skill_proposal(self):
+        from self_evolution.evolution_proposer import generate_proposals
+
+        # Report with enough sessions to pass the ≥5 threshold
+        report = self._make_report(
+            best=["efficient workflow discovered"],
+            sessions=10,
+        )
+        proposals = generate_proposals(report, report_id=1)
+        skill_proposals = [p for p in proposals if p.proposal_type == "skill"]
+        assert len(skill_proposals) > 0
+
+    def test_success_pattern_skipped_below_threshold(self):
+        """Skill proposals should not be generated from best_patterns with <5 sessions."""
+        from self_evolution.evolution_proposer import generate_proposals
+
+        report = self._make_report(
+            best=["efficient workflow discovered"],
+            recs=[],  # No recommendations that might create skill proposals
+            sessions=2,  # Below threshold
+        )
+        proposals = generate_proposals(report, report_id=1)
+        skill_from_best = [
+            p for p in proposals
+            if p.proposal_type == "skill" and p.id.startswith("prop-success-")
+        ]
+        assert len(skill_from_best) == 0
+
+    def test_recommendation_type_detection(self):
+        from self_evolution.evolution_proposer import generate_proposals
+
+        report = self._make_report(recs=["更新记忆来记住这个发现"])
+        proposals = generate_proposals(report, report_id=1)
+        memory_proposals = [p for p in proposals if p.proposal_type == "memory"]
+        assert len(memory_proposals) > 0
+
+    def test_deduplication(self):
+        from self_evolution.evolution_proposer import generate_proposals
+
+        report = self._make_report(
+            worst=["same pattern", "same pattern"],  # duplicate
+        )
+        proposals = generate_proposals(report, report_id=1)
+        titles = [p.title for p in proposals]
+        assert len(titles) == len(set(titles)), "Should deduplicate similar titles"
+
+    def test_max_five_proposals(self):
+        from self_evolution.evolution_proposer import generate_proposals
+
+        report = self._make_report(
+            worst=[f"pattern {i}" for i in range(10)],
+            best=[f"best {i}" for i in range(10)],
+            recs=[f"rec {i}" for i in range(10)],
+        )
+        proposals = generate_proposals(report, report_id=1)
+        assert len(proposals) <= 5
+
+
+# ============================================================================
+# 8. Evolution Executor
+# ============================================================================
+
+class TestEvolutionExecutor:
+    """Test execution of approved proposals."""
+
+    def test_execute_strategy_proposal(self, _tmp_evolution_db, tmp_path, monkeypatch):
+        from self_evolution.evolution_executor import EvolutionExecutor
+        from self_evolution.models import Proposal
+
+        monkeypatch.setattr(
+            "self_evolution.evolution_executor.STRATEGIES_DIR", tmp_path,
+        )
+        monkeypatch.setattr(
+            "self_evolution.evolution_executor.STRATEGIES_FILE",
+            tmp_path / "strategies.json",
+        )
+        monkeypatch.setattr(
+            "self_evolution.evolution_executor.ARCHIVE_DIR",
+            tmp_path / "archive",
+        )
+        monkeypatch.setattr(
+            "self_evolution.strategy_store.STRATEGIES_DIR", tmp_path,
+        )
+        monkeypatch.setattr(
+            "self_evolution.strategy_store.STRATEGIES_FILE",
+            tmp_path / "strategies.json",
+        )
+        monkeypatch.setattr(
+            "self_evolution.strategy_store.ARCHIVE_DIR",
+            tmp_path / "archive",
+        )
+
+        proposal = Proposal(
+            id="prop-exec-1",
+            proposal_type="strategy",
+            title="Test Strategy",
+            description="Avoid large file reads",
+            status="approved",
+        )
+        executor = EvolutionExecutor()
+        executor.execute(proposal)
+
+        # Verify status updated
+        row = _tmp_evolution_db.fetch_one("evolution_proposals", where="id IS NULL")  # proposal not in DB, skip
+        # Verify strategy file updated
+        from self_evolution.strategy_store import StrategyStore
+        store = StrategyStore()
+        data = store.load()
+        assert data["version"] >= 1
+        assert any(r["id"] == "prop-exec-1" for r in data["rules"])
+
+    def test_execute_skill_proposal(self, _tmp_evolution_db, tmp_path, monkeypatch):
+        from self_evolution.evolution_executor import EvolutionExecutor
+        from self_evolution.models import Proposal
+
+        skills_dir = tmp_path / "skills" / "learned"
+        monkeypatch.setattr("pathlib.Path.home", lambda: tmp_path)
+
+        proposal = Proposal(
+            id="prop-skill-1",
+            proposal_type="skill",
+            title="Test Skill",
+            description="A learned skill for testing",
+            status="approved",
+        )
+        executor = EvolutionExecutor()
+        executor.execute(proposal)
+
+        skill_file = tmp_path / ".hermes" / "skills" / "learned" / "prop-skill-1" / "SKILL.md"
+        assert skill_file.exists()
+        content = skill_file.read_text()
+        assert "Test Skill" in content
+
+    def test_execute_memory_proposal(self, _tmp_evolution_db, tmp_path, monkeypatch):
+        from self_evolution.evolution_executor import EvolutionExecutor
+        from self_evolution.models import Proposal
+
+        memories_dir = tmp_path / ".hermes" / "memories"
+        monkeypatch.setattr("pathlib.Path.home", lambda: tmp_path)
+
+        proposal = Proposal(
+            id="prop-mem-1",
+            proposal_type="memory",
+            title="Remember Pattern",
+            description="Always use context managers for file operations",
+            status="approved",
+        )
+        executor = EvolutionExecutor()
+        executor.execute(proposal)
+
+        perf_file = memories_dir / "PERFORMANCE.md"
+        assert perf_file.exists()
+        content = perf_file.read_text()
+        assert "context managers" in content
+
+    def test_execute_tool_preference_proposal(self, _tmp_evolution_db, tmp_path, monkeypatch):
+        from self_evolution.evolution_executor import EvolutionExecutor
+        from self_evolution.models import Proposal
+
+        evo_dir = tmp_path / "self_evolution"
+        evo_dir.mkdir(parents=True, exist_ok=True)
+        monkeypatch.setattr("self_evolution.paths.DATA_DIR", evo_dir)
+        monkeypatch.setattr("self_evolution.evolution_executor.STRATEGIES_DIR", evo_dir)
+
+        proposal = Proposal(
+            id="prop-tool-1",
+            proposal_type="tool_preference",
+            title="Prefer grep over find",
+            description="Use grep instead of find for searching",
+            expected_impact="faster searches",
+            status="approved",
+        )
+        executor = EvolutionExecutor()
+        executor.execute(proposal)
+
+        prefs_file = evo_dir / "tool_preferences.json"
+        assert prefs_file.exists()
+        prefs = json.loads(prefs_file.read_text())
+        assert "prop-tool-1" in prefs
+
+
+# ============================================================================
+# 9. Reflection Engine — Parsing
+# ============================================================================
+
+class TestReflectionEngine:
+    """Test reflection report parsing from model output."""
+
+    def _make_engine(self):
+        from self_evolution.reflection_engine import DreamEngine
+        return DreamEngine(config={"base_url": "", "model": ""})
+
+    def test_parse_valid_json(self):
+        engine = self._make_engine()
+        text = json.dumps({
+            "worst_patterns": ["bash timeouts", "repeated reads"],
+            "best_patterns": ["single-turn success"],
+            "recommendations": ["add retry logic"],
+            "tool_insights": {"bash": {"sr": 0.9}},
+        })
+        report = engine._parse_reflection(
+            text, 1000.0, 2000.0, 5, 0.75,
+            error_analysis=MagicMock(summary=lambda: ""),
+            waste_analysis=MagicMock(summary=lambda: ""),
+        )
+        assert len(report.worst_patterns) == 2
+        assert len(report.best_patterns) == 1
+        assert len(report.recommendations) == 1
+
+    def test_parse_json_in_markdown_wrapper(self):
+        engine = self._make_engine()
+        text = '```json\n{"worst_patterns": ["p1"], "best_patterns": [], "recommendations": []}\n```'
+        report = engine._parse_reflection(
+            text, 1000.0, 2000.0, 1, 0.5,
+            error_analysis=MagicMock(summary=lambda: ""),
+            waste_analysis=MagicMock(summary=lambda: ""),
+        )
+        assert report.worst_patterns == ["p1"]
+
+    def test_parse_text_sections(self):
+        engine = self._make_engine()
+        text = """Here is my analysis:
+
+worst patterns:
+- Too many retries
+- Slow file operations
+
+best patterns:
+- Direct code generation
+
+recommendations:
+- Cache tool results
+- Optimize file reads
+"""
+        report = engine._parse_reflection(
+            text, 1000.0, 2000.0, 1, 0.5,
+            error_analysis=MagicMock(summary=lambda: ""),
+            waste_analysis=MagicMock(summary=lambda: ""),
+        )
+        assert len(report.worst_patterns) >= 1
+        assert len(report.best_patterns) >= 1
+        assert len(report.recommendations) >= 1
+
+    def test_parse_numbered_list(self):
+        engine = self._make_engine()
+        text = """分析结果:
+
+worst patterns:
+1) Bash command timeouts
+2) Repeated tool calls
+
+recommendations:
+1) Add timeout handling
+"""
+        report = engine._parse_reflection(
+            text, 1000.0, 2000.0, 1, 0.5,
+            error_analysis=MagicMock(summary=lambda: ""),
+            waste_analysis=MagicMock(summary=lambda: ""),
+        )
+        assert len(report.worst_patterns) >= 1
+
+    def test_parse_empty_text(self):
+        engine = self._make_engine()
+        report = engine._parse_reflection(
+            "", 1000.0, 2000.0, 0, 0.0,
+            error_analysis=MagicMock(summary=lambda: ""),
+            waste_analysis=MagicMock(summary=lambda: ""),
+        )
+        assert report.worst_patterns == []
+        assert report.best_patterns == []
+        assert report.recommendations == []
+
+
+# ============================================================================
+# 10. Integration — End-to-End Flow
+# ============================================================================
+
+class TestEndToEndFlow:
+    """Test the full self-evolution cycle with mocked LLM calls."""
+
+    def test_full_cycle_no_model(self, _tmp_evolution_db, tmp_path, monkeypatch):
+        """Simulate the full cycle: hooks → data → analysis (without LLM call)."""
+        from self_evolution.hooks import on_tool_call, on_session_end
+        from self_evolution.reflection_engine import DreamEngine
+
+        # 1. Simulate tool calls
+        for i in range(5):
+            on_tool_call(
+                tool_name="bash",
+                duration_ms=200 + i * 100,
+                success=(i < 4),  # last one fails
+                error_type="timeout" if i == 4 else None,
+                session_id="s-e2e-1",
+                turn_number=i,
+            )
+
+        # 2. Simulate session end
+        on_session_end(session_data={
+            "session_id": "s-e2e-1",
+            "completed": True,
+            "iterations": 5,
+            "tool_call_count": 5,
+            "message_count": 2,
+            "tool_names": ["bash"],
+            "model": "test",
+        })
+
+        # 3. Verify data was collected
+        invocations = _tmp_evolution_db.fetch_all("tool_invocations")
+        assert len(invocations) == 5
+
+        scores = _tmp_evolution_db.fetch_all("session_scores")
+        assert len(scores) == 1
+
+        # 4. Run error analysis directly (no LLM)
+        engine = DreamEngine(config={"base_url": "", "model": ""})
+        invocations = _tmp_evolution_db.fetch_all("tool_invocations")
+        signals = _tmp_evolution_db.fetch_all("outcome_signals")
+        scores = _tmp_evolution_db.fetch_all("session_scores")
+
+        error_analysis = engine._analyze_errors(scores, invocations, signals)
+        assert len(error_analysis.tool_failures) == 1
+        assert error_analysis.tool_failures[0].tool_name == "bash"
+        assert error_analysis.tool_failures[0].count == 1
+
+        # 5. Time waste analysis
+        waste_analysis = engine._analyze_time_waste(scores, invocations)
+        assert len(waste_analysis.slowest_tools) > 0
+
+    def test_reflection_prompt_builds(self, _tmp_evolution_db):
+        """Verify the reflection prompt is well-formed."""
+        from self_evolution.reflection_engine import DreamEngine
+
+        engine = DreamEngine(config={"base_url": "", "model": ""})
+
+        # Insert mock data
+        _tmp_evolution_db.insert("session_scores", {
+            "session_id": "s1", "composite_score": 0.8,
+            "completion_rate": 1.0, "efficiency_score": 0.7,
+            "cost_efficiency": 0.9, "satisfaction_proxy": 0.8,
+            "task_category": "coding", "model": "test",
+        })
+        _tmp_evolution_db.insert("tool_invocations", {
+            "session_id": "s1", "tool_name": "bash",
+            "duration_ms": 500, "success": True, "turn_number": 1,
+        })
+
+        scores = _tmp_evolution_db.fetch_all("session_scores")
+        invocations = _tmp_evolution_db.fetch_all("tool_invocations")
+        signals = _tmp_evolution_db.fetch_all("outcome_signals")
+
+        error_analysis = engine._analyze_errors(scores, invocations, signals)
+        waste_analysis = engine._analyze_time_waste(scores, invocations)
+
+        prompt = engine._build_reflection_prompt(
+            scores, invocations, signals,
+            error_analysis, waste_analysis, avg_score=0.8,
+        )
+        assert "概况" in prompt or "sessions" in prompt
+        assert "0.800" in prompt
+
+
+# ============================================================================
+# 11. Security — SQL Injection Prevention
+# ============================================================================
+
+class TestSecurity:
+    """Test security hardening measures."""
+
+    def test_sql_injection_rejected_invalid_table(self, _tmp_evolution_db):
+        """Table names not in the whitelist must raise ValueError."""
+        with pytest.raises(ValueError, match="Invalid table name"):
+            _tmp_evolution_db.insert("users; DROP TABLE users--", {"id": 1})
+
+    def test_sql_injection_rejected_in_fetch(self, _tmp_evolution_db):
+        with pytest.raises(ValueError, match="Invalid table name"):
+            _tmp_evolution_db.fetch_one("nonexistent_table")
+
+    def test_sql_injection_rejected_in_update(self, _tmp_evolution_db):
+        with pytest.raises(ValueError, match="Invalid table name"):
+            _tmp_evolution_db.update(
+                "evil_table", {"x": 1}, where="1=1",
+            )
+
+    def test_sql_injection_rejected_in_insert_many(self, _tmp_evolution_db):
+        with pytest.raises(ValueError, match="Invalid table name"):
+            _tmp_evolution_db.insert_many("bad_table", [{"x": 1}])
+
+    def test_sql_injection_rejected_in_fetch_all(self, _tmp_evolution_db):
+        with pytest.raises(ValueError, match="Invalid table name"):
+            _tmp_evolution_db.fetch_all("no_such_table")
+
+    def test_limit_coerced_to_int(self, _tmp_evolution_db):
+        """Non-integer limit values should be safely coerced."""
+        _tmp_evolution_db.insert("tool_invocations", {
+            "session_id": "s1", "tool_name": "bash",
+            "duration_ms": 100, "success": True, "turn_number": 0,
+        })
+        # Pass a string-ish limit; int() coercion should handle it
+        rows = _tmp_evolution_db.fetch_all(
+            "tool_invocations", limit=1,
+        )
+        assert len(rows) == 1
+
+    def test_valid_tables_still_work(self, _tmp_evolution_db):
+        """All legitimate tables should pass validation."""
+        _tmp_evolution_db.insert("tool_invocations", {
+            "session_id": "s-ok", "tool_name": "bash",
+            "duration_ms": 100, "success": True, "turn_number": 0,
+        })
+        _tmp_evolution_db.insert("outcome_signals", {
+            "session_id": "s-ok", "signal_type": "test",
+            "signal_value": 1.0,
+        })
+        rows = _tmp_evolution_db.fetch_all("tool_invocations")
+        assert len(rows) == 1

功能	集成方式	修改核心
工具调用遥测	post_tool_call	NO
Session 评分	on_session_end	NO
策略注入	pre_llm_call	NO
定时任务	cron/jobs.json	NO
飞书通知	gateway/ 飞书网关	NO
技能创建	skill_manager_tool	NO
记忆更新	memory_tool	NO
历史数据	state.db 只读	NO