This commit is contained in:
玉冰 2026-04-25 07:59:18 +08:00 committed by GitHub
commit c90dd4860f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 6173 additions and 0 deletions

View file

@ -0,0 +1,911 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Hermes Agent 自我优化与持续进化系统设计</title>
<style>
:root {
--bg: #0f1117;
--bg-card: #1a1d2e;
--bg-card2: #232740;
--border: #2d3250;
--text: #e2e8f0;
--text-dim: #94a3b8;
--accent: #6366f1;
--accent2: #8b5cf6;
--green: #10b981;
--green-dim: rgba(16,185,129,0.15);
--amber: #f59e0b;
--amber-dim: rgba(245,158,11,0.15);
--red: #ef4444;
--red-dim: rgba(239,68,68,0.15);
--blue: #3b82f6;
--blue-dim: rgba(59,130,246,0.15);
--cyan: #06b6d4;
--pink: #ec4899;
}
* { margin:0; padding:0; box-sizing:border-box; }
body {
background: var(--bg);
color: var(--text);
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.6;
padding: 2rem;
max-width: 1200px;
margin: 0 auto;
}
h1 { font-size: 2rem; font-weight: 700; margin-bottom: 0.5rem; }
h2 { font-size: 1.5rem; font-weight: 600; margin: 2.5rem 0 1rem; color: var(--accent); }
h3 { font-size: 1.15rem; font-weight: 600; margin: 1.5rem 0 0.75rem; }
p { color: var(--text-dim); margin-bottom: 1rem; }
.subtitle { color: var(--text-dim); font-size: 1.05rem; margin-bottom: 2rem; }
/* Hero */
.hero {
background: linear-gradient(135deg, #1e1b4b 0%, #0f172a 50%, #0c1220 100%);
border: 1px solid var(--border);
border-radius: 16px;
padding: 3rem;
margin-bottom: 2rem;
position: relative;
overflow: hidden;
}
.hero::before {
content: '';
position: absolute;
top: -50%;
right: -20%;
width: 500px;
height: 500px;
background: radial-gradient(circle, rgba(99,102,241,0.12) 0%, transparent 70%);
pointer-events: none;
}
.hero h1 { position: relative; }
.hero .subtitle { position: relative; }
.badge-row { display: flex; gap: 0.5rem; flex-wrap: wrap; margin-top: 1.5rem; position: relative; }
.badge {
display: inline-flex;
align-items: center;
gap: 0.35rem;
padding: 0.3rem 0.75rem;
border-radius: 999px;
font-size: 0.8rem;
font-weight: 500;
}
.badge-purple { background: rgba(139,92,246,0.15); color: #a78bfa; border: 1px solid rgba(139,92,246,0.25); }
.badge-green { background: var(--green-dim); color: var(--green); border: 1px solid rgba(16,185,129,0.25); }
.badge-blue { background: var(--blue-dim); color: var(--blue); border: 1px solid rgba(59,130,246,0.25); }
.badge-amber { background: var(--amber-dim); color: var(--amber); border: 1px solid rgba(245,158,11,0.25); }
/* Cards */
.card {
background: var(--bg-card);
border: 1px solid var(--border);
border-radius: 12px;
padding: 1.5rem;
margin-bottom: 1.5rem;
}
.card-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(320px, 1fr)); gap: 1.5rem; }
/* Architecture Diagram */
.arch-container {
background: var(--bg-card);
border: 1px solid var(--border);
border-radius: 16px;
padding: 2rem;
margin: 2rem 0;
overflow-x: auto;
}
.arch-flow {
display: flex;
align-items: center;
justify-content: center;
gap: 0.5rem;
flex-wrap: wrap;
min-width: 700px;
}
.arch-node {
display: flex;
flex-direction: column;
align-items: center;
gap: 0.35rem;
padding: 1rem 1.25rem;
border-radius: 12px;
min-width: 110px;
text-align: center;
position: relative;
transition: transform 0.2s;
}
.arch-node:hover { transform: translateY(-3px); }
.arch-node .icon { font-size: 1.5rem; }
.arch-node .label { font-size: 0.85rem; font-weight: 600; }
.arch-node .desc { font-size: 0.7rem; color: var(--text-dim); }
.node-observe { background: var(--blue-dim); border: 1px solid rgba(59,130,246,0.3); }
.node-evaluate { background: rgba(139,92,246,0.12); border: 1px solid rgba(139,92,246,0.3); }
.node-reflect { background: rgba(6,182,212,0.12); border: 1px solid rgba(6,182,212,0.3); }
.node-learn { background: var(--green-dim); border: 1px solid rgba(16,185,129,0.3); }
.node-evolve { background: var(--amber-dim); border: 1px solid rgba(245,158,11,0.3); }
.node-data { background: rgba(236,72,153,0.1); border: 1px solid rgba(236,72,153,0.25); }
.arch-arrow {
font-size: 1.5rem;
color: var(--text-dim);
flex-shrink: 0;
}
/* Timeline */
.timeline {
position: relative;
padding-left: 2.5rem;
margin: 2rem 0;
}
.timeline::before {
content: '';
position: absolute;
left: 0.75rem;
top: 0;
bottom: 0;
width: 2px;
background: linear-gradient(to bottom, var(--accent), var(--cyan), var(--green), var(--amber));
}
.tl-item {
position: relative;
margin-bottom: 2rem;
padding: 1.25rem 1.5rem;
background: var(--bg-card);
border: 1px solid var(--border);
border-radius: 12px;
}
.tl-item::before {
content: '';
position: absolute;
left: -2.05rem;
top: 1.4rem;
width: 12px;
height: 12px;
border-radius: 50%;
border: 2px solid var(--accent);
background: var(--bg);
}
.tl-item.night::before { border-color: var(--cyan); }
.tl-item.morning::before { border-color: var(--green); }
.tl-item.action::before { border-color: var(--amber); }
.tl-item .tl-time {
font-size: 0.8rem;
font-weight: 600;
color: var(--cyan);
margin-bottom: 0.35rem;
}
.tl-item.morning .tl-time { color: var(--green); }
.tl-item.action .tl-time { color: var(--amber); }
.tl-item .tl-title { font-weight: 600; margin-bottom: 0.5rem; }
.tl-item .tl-desc { font-size: 0.9rem; color: var(--text-dim); }
/* Flowchart-style dream */
.flow-box {
display: flex;
flex-direction: column;
gap: 0.5rem;
}
.flow-step {
display: flex;
align-items: flex-start;
gap: 1rem;
padding: 1rem;
background: var(--bg-card2);
border-radius: 8px;
border-left: 3px solid var(--accent);
}
.flow-step.step-error { border-left-color: var(--red); }
.flow-step.step-waste { border-left-color: var(--amber); }
.flow-step.step-model { border-left-color: var(--cyan); }
.flow-step.step-output { border-left-color: var(--green); }
.flow-step .step-num {
flex-shrink: 0;
width: 28px;
height: 28px;
display: flex;
align-items: center;
justify-content: center;
border-radius: 50%;
background: var(--accent);
color: #fff;
font-size: 0.8rem;
font-weight: 700;
}
.flow-step.step-error .step-num { background: var(--red); }
.flow-step.step-waste .step-num { background: var(--amber); }
.flow-step.step-model .step-num { background: var(--cyan); }
.flow-step.step-output .step-num { background: var(--green); }
.flow-step .step-content { flex: 1; }
.flow-step .step-title { font-weight: 600; font-size: 0.95rem; margin-bottom: 0.25rem; }
.flow-step .step-desc { font-size: 0.85rem; color: var(--text-dim); }
.flow-step ul { margin: 0.35rem 0 0 1rem; font-size: 0.85rem; color: var(--text-dim); }
.flow-step li { margin-bottom: 0.15rem; }
/* Feishu mockup */
.feishu-card {
background: #fff;
border-radius: 12px;
padding: 1.5rem;
color: #1f2937;
max-width: 420px;
margin: 1.5rem auto;
box-shadow: 0 4px 24px rgba(0,0,0,0.3);
font-size: 0.9rem;
}
.feishu-card .fc-header {
display: flex;
align-items: center;
gap: 0.5rem;
padding-bottom: 0.75rem;
border-bottom: 1px solid #e5e7eb;
margin-bottom: 0.75rem;
}
.feishu-card .fc-header .fc-icon {
width: 32px; height: 32px;
background: linear-gradient(135deg, #3b82f6, #8b5cf6);
border-radius: 8px;
display: flex;
align-items: center;
justify-content: center;
color: #fff;
font-size: 1rem;
}
.feishu-card .fc-header .fc-title { font-weight: 600; }
.feishu-card .fc-section { margin-bottom: 0.75rem; }
.feishu-card .fc-section-title { font-weight: 600; font-size: 0.85rem; margin-bottom: 0.35rem; color: #374151; }
.feishu-card .fc-row { display: flex; justify-content: space-between; font-size: 0.8rem; color: #6b7280; padding: 0.1rem 0; }
.feishu-card .fc-proposal {
background: #f9fafb;
border-radius: 8px;
padding: 0.75rem;
margin-bottom: 0.5rem;
}
.feishu-card .fc-proposal-title { font-weight: 600; font-size: 0.85rem; margin-bottom: 0.25rem; }
.feishu-card .fc-proposal-desc { font-size: 0.78rem; color: #6b7280; margin-bottom: 0.5rem; }
.feishu-card .fc-btns { display: flex; gap: 0.5rem; }
.feishu-card .fc-btn {
padding: 0.3rem 0.75rem;
border-radius: 6px;
font-size: 0.78rem;
font-weight: 500;
border: none;
cursor: pointer;
}
.fc-btn-approve { background: #3b82f6; color: #fff; }
.fc-btn-modify { background: #f3f4f6; color: #374151; border: 1px solid #d1d5db; }
.fc-btn-reject { background: #fef2f2; color: #ef4444; border: 1px solid #fecaca; }
/* Ref table */
.ref-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 1rem; }
.ref-card {
background: var(--bg-card2);
border: 1px solid var(--border);
border-radius: 10px;
padding: 1.25rem;
}
.ref-card .ref-source {
font-size: 0.75rem;
color: var(--cyan);
margin-bottom: 0.5rem;
font-family: 'SF Mono', monospace;
}
.ref-card .ref-title { font-weight: 600; margin-bottom: 0.5rem; }
.ref-card .ref-desc { font-size: 0.85rem; color: var(--text-dim); }
/* DB schema */
.db-table {
background: var(--bg-card2);
border: 1px solid var(--border);
border-radius: 8px;
padding: 1rem;
margin-bottom: 1rem;
font-family: 'SF Mono', 'Fira Code', monospace;
font-size: 0.8rem;
}
.db-table .db-name {
color: var(--cyan);
font-weight: 700;
margin-bottom: 0.5rem;
}
.db-table .db-col { color: var(--text-dim); padding: 0.1rem 0; }
.db-table .db-col span { color: var(--amber); }
/* Safety */
.safety-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(240px, 1fr)); gap: 1rem; }
.safety-item {
background: var(--bg-card2);
border: 1px solid var(--border);
border-radius: 10px;
padding: 1.25rem;
text-align: center;
}
.safety-item .safety-icon { font-size: 2rem; margin-bottom: 0.5rem; }
.safety-item .safety-title { font-weight: 600; font-size: 0.95rem; margin-bottom: 0.35rem; }
.safety-item .safety-desc { font-size: 0.82rem; color: var(--text-dim); }
/* File tree */
.file-tree {
font-family: 'SF Mono', 'Fira Code', monospace;
font-size: 0.82rem;
line-height: 1.8;
color: var(--text-dim);
background: var(--bg-card2);
border: 1px solid var(--border);
border-radius: 8px;
padding: 1.25rem;
overflow-x: auto;
}
.file-tree .dir { color: var(--cyan); font-weight: 600; }
.file-tree .file { color: var(--text); }
.file-tree .comment { color: var(--text-dim); font-style: italic; }
/* Quality formula */
.formula {
background: var(--bg-card2);
border: 1px solid var(--border);
border-radius: 10px;
padding: 1.5rem 2rem;
margin: 1rem 0;
font-family: 'SF Mono', 'Fira Code', monospace;
font-size: 0.88rem;
text-align: center;
line-height: 2;
}
.formula .w { color: var(--amber); }
.formula .var { color: var(--cyan); }
.formula .op { color: var(--text-dim); }
/* Integration table */
.int-table {
width: 100%;
border-collapse: collapse;
margin: 1rem 0;
font-size: 0.88rem;
}
.int-table th {
text-align: left;
padding: 0.75rem 1rem;
background: var(--bg-card2);
color: var(--text-dim);
font-weight: 600;
font-size: 0.8rem;
text-transform: uppercase;
letter-spacing: 0.05em;
}
.int-table td {
padding: 0.65rem 1rem;
border-bottom: 1px solid var(--border);
}
.int-table .hook {
font-family: 'SF Mono', monospace;
font-size: 0.8rem;
color: var(--cyan);
background: rgba(6,182,212,0.1);
padding: 0.15rem 0.5rem;
border-radius: 4px;
}
.int-table .no-mod { color: var(--green); }
/* Phase timeline */
.phases { display: grid; grid-template-columns: repeat(4, 1fr); gap: 1rem; margin: 1.5rem 0; }
.phase {
background: var(--bg-card2);
border: 1px solid var(--border);
border-radius: 10px;
padding: 1.25rem;
position: relative;
}
.phase .phase-num {
font-size: 2rem;
font-weight: 800;
color: var(--accent);
opacity: 0.3;
margin-bottom: 0.25rem;
}
.phase .phase-title { font-weight: 600; font-size: 0.95rem; margin-bottom: 0.5rem; }
.phase ul { margin-left: 1rem; font-size: 0.82rem; color: var(--text-dim); }
.phase li { margin-bottom: 0.25rem; }
/* Arrow connector between phases */
.phase:not(:last-child)::after {
content: '→';
position: absolute;
right: -1.2rem;
top: 50%;
transform: translateY(-50%);
font-size: 1.5rem;
color: var(--text-dim);
}
/* Scrollbar */
::-webkit-scrollbar { width: 6px; height: 6px; }
::-webkit-scrollbar-track { background: transparent; }
::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
@media (max-width: 768px) {
body { padding: 1rem; }
.phases { grid-template-columns: 1fr 1fr; }
.phase:not(:last-child)::after { display: none; }
.arch-flow { flex-direction: column; }
.arch-arrow { transform: rotate(90deg); }
}
</style>
</head>
<body>
<!-- ═══════ Hero ═══════ -->
<div class="hero">
<h1>Hermes Agent 自我优化与持续进化系统</h1>
<p class="subtitle">一套完全插件化的 agent 自我进化机制 — 通过每日"梦境整理"和"飞书审批流"实现闭环自我优化</p>
<div class="badge-row">
<span class="badge badge-purple">零侵入核心代码</span>
<span class="badge badge-blue">完全插件化</span>
<span class="badge badge-green">GLM-5.1 / Qwen 降级</span>
<span class="badge badge-amber">飞书审批流</span>
</div>
</div>
<!-- ═══════ Architecture ═══════ -->
<h2>核心架构:五层闭环</h2>
<p>观察 → 评估 → 反思 → 学习 → 进化,形成持续自我改进的闭环循环。</p>
<div class="arch-container">
<div class="arch-flow">
<div class="arch-node node-observe">
<span class="icon">📡</span>
<span class="label">观察</span>
<span class="desc">遥测采集<br>post_tool_call</span>
</div>
<span class="arch-arrow"></span>
<div class="arch-node node-evaluate">
<span class="icon">📊</span>
<span class="label">评估</span>
<span class="desc">质量评分<br>on_session_end</span>
</div>
<span class="arch-arrow"></span>
<div class="arch-node node-reflect">
<span class="icon">🌙</span>
<span class="label">反思</span>
<span class="desc">梦境整理<br>凌晨 1:00</span>
</div>
<span class="arch-arrow"></span>
<div class="arch-node node-learn">
<span class="icon">🧠</span>
<span class="label">学习</span>
<span class="desc">进化提案<br>策略生成</span>
</div>
<span class="arch-arrow"></span>
<div class="arch-node node-evolve">
<span class="icon">🚀</span>
<span class="label">进化</span>
<span class="desc">飞书审批 → 执行<br>19:00 推送</span>
</div>
<span class="arch-arrow"></span>
<div class="arch-node node-data">
<span class="icon">💾</span>
<span class="label">存储</span>
<span class="desc">evolution.db<br>strategies.json</span>
</div>
</div>
</div>
<!-- ═══════ Daily Flow ═══════ -->
<h2>每日流程</h2>
<p>从凌晨梦境整理到晚间飞书推送,一天的自动进化循环。</p>
<div class="timeline">
<div class="tl-item night">
<div class="tl-time">01:00 — 梦境整理(自动执行)</div>
<div class="tl-title">DreamEngine.run() — 分析前日全部 session</div>
<div class="flow-box" style="margin-top: 1rem;">
<div class="flow-step">
<div class="step-num">1</div>
<div class="step-content">
<div class="step-title">数据汇总</div>
<div class="step-desc">读取 state.db只读+ evolution.db计算各 session 质量评分</div>
</div>
</div>
<div class="flow-step step-error">
<div class="step-num">2</div>
<div class="step-content">
<div class="step-title">错误分析(重点)</div>
<ul>
<li>工具调用失败统计(按工具、按错误类型分布)</li>
<li>反复重试检测(同一工具同一 session 调用 > 2次</li>
<li>未完成 session、用户纠正消息、API 错误</li>
<li>错误连锁分析(一个失败是否引发后续失败)</li>
</ul>
</div>
</div>
<div class="flow-step step-waste">
<div class="step-num">3</div>
<div class="step-content">
<div class="step-title">时间浪费分析(重点)</div>
<ul>
<li>耗时最长的工具调用 TOP 10</li>
<li>重复操作(多次读同一文件、重复搜索)</li>
<li>低效 session迭代轮数过多、工具调用过多</li>
<li>可缩短的工具调用链</li>
</ul>
</div>
</div>
<div class="flow-step step-model">
<div class="step-num">4</div>
<div class="step-content">
<div class="step-title">深度反思GLM-5.1 优先 / Qwen 降级)</div>
<div class="step-desc">将分析结果发送到本地模型,产出结构化 ReflectionReport错误根因 + 浪费根因 + 可操作建议</div>
</div>
</div>
<div class="flow-step step-output">
<div class="step-num">5</div>
<div class="step-content">
<div class="step-title">模式识别 + 生成进化提案</div>
<div class="step-desc">高成功率模式 → 候选技能 重复错误 → 候选规避策略 系统性浪费 → 候选流程优化</div>
</div>
</div>
</div>
</div>
<div class="tl-item morning">
<div class="tl-time">19:00 — 飞书推送进化方案</div>
<div class="tl-title">FeishuNotifier.send_daily_report()</div>
<div class="tl-desc">读取当日凌晨产出的 pending_approval 提案,格式化为飞书交互卡片推送给用户。</div>
</div>
<div class="tl-item action">
<div class="tl-time">用户审批后 — 执行进化</div>
<div class="tl-title">EvolutionExecutor.execute()</div>
<div class="tl-desc">飞书回调触发执行:技能创建 / 策略调整 / 记忆更新 / 工具偏好变更。执行后自动创建 A/B 测试追踪单元。</div>
</div>
</div>
<!-- ═══════ Feishu Mockup ═══════ -->
<h3>飞书卡片消息预览</h3>
<div class="feishu-card">
<div class="fc-header">
<div class="fc-icon">🌅</div>
<div>
<div class="fc-title">Hermes 每日进化报告 (2026-04-18)</div>
</div>
</div>
<div class="fc-section">
<div class="fc-section-title">📊 前日概况</div>
<div class="fc-row"><span>完成 sessions</span><span>23</span></div>
<div class="fc-row"><span>平均质量评分</span><span>0.78 ↑0.03</span></div>
<div class="fc-row"><span>工具调用 / 成功率</span><span>156次 / 91%</span></div>
</div>
<div class="fc-section">
<div class="fc-section-title">❌ 错误分析</div>
<div class="fc-row"><span>browser_tool 失败</span><span>5次 (超时3次)</span></div>
<div class="fc-row"><span>未完成 session</span><span>2个</span></div>
<div class="fc-row"><span>用户纠正</span><span>3次</span></div>
</div>
<div class="fc-section">
<div class="fc-section-title">⏱️ 时间浪费分析</div>
<div class="fc-row"><span>重复读取同一文件</span><span>8次</span></div>
<div class="fc-row"><span>web_search→browser 冗余</span><span>6次</span></div>
<div class="fc-row"><span>平均迭代轮数</span><span>12轮 (理想8轮)</span></div>
</div>
<hr style="border-color:#e5e7eb; margin:0.75rem 0;">
<div class="fc-section">
<div class="fc-section-title">📋 进化提案 (3项)</div>
<div class="fc-proposal">
<div class="fc-proposal-title">[1] 🛠️ 创建技能: web_search_pipeline</div>
<div class="fc-proposal-desc">预期: 搜索任务成功率 +15% 风险: low</div>
<div class="fc-btns">
<button class="fc-btn fc-btn-approve">通过</button>
<button class="fc-btn fc-btn-modify">修改</button>
<button class="fc-btn fc-btn-reject">拒绝</button>
</div>
</div>
<div class="fc-proposal">
<div class="fc-proposal-title">[2] ⚡ 策略调整: 优先 grep 替代 find</div>
<div class="fc-proposal-desc">预期: 文件搜索效率 +25% 风险: low</div>
<div class="fc-btns">
<button class="fc-btn fc-btn-approve">通过</button>
<button class="fc-btn fc-btn-modify">修改</button>
<button class="fc-btn fc-btn-reject">拒绝</button>
</div>
</div>
<div class="fc-proposal">
<div class="fc-proposal-title">[3] 🧠 记忆更新: 用户偏好中文回复</div>
<div class="fc-proposal-desc">预期: 用户满意度提升 风险: low</div>
<div class="fc-btns">
<button class="fc-btn fc-btn-approve">通过</button>
<button class="fc-btn fc-btn-modify">修改</button>
<button class="fc-btn fc-btn-reject">拒绝</button>
</div>
</div>
</div>
</div>
<!-- ═══════ Quality Score ═══════ -->
<h2>质量评分体系</h2>
<p>每个 session 结束时自动计算复合质量评分,零 API 成本。</p>
<div class="formula">
<span class="var">session_quality</span> <span class="op">=</span>
<span class="w">0.40</span> × <span class="var">completion_rate</span> <span class="op">+</span>
<span class="w">0.20</span> × <span class="var">efficiency_score</span> <span class="op">+</span>
<span class="w">0.15</span> × <span class="var">cost_efficiency</span> <span class="op">+</span>
<span class="w">0.25</span> × <span class="var">satisfaction_proxy</span>
</div>
<div class="card-grid">
<div class="card">
<h3>completion_rate <span style="color:var(--w);font-size:0.8rem;">权重 0.40</span></h3>
<p>任务是否完成。completed=1.0, interrupted=0.5, failed=0.0</p>
</div>
<div class="card">
<h3>efficiency_score <span style="color:var(--w);font-size:0.8rem;">权重 0.20</span></h3>
<p>迭代效率。理想轮数 / 实际轮数,上限 1.0</p>
</div>
<div class="card">
<h3>cost_efficiency <span style="color:var(--w);font-size:0.8rem;">权重 0.15</span></h3>
<p>工具使用效率。期望调用数 / 实际调用数,上限 1.0</p>
</div>
<div class="card">
<h3>satisfaction_proxy <span style="color:var(--w);font-size:0.8rem;">权重 0.25</span></h3>
<p>满意度代理。单轮完成=0.9, 多轮完成=0.75, 预算耗尽=-0.2</p>
</div>
</div>
<!-- ═══════ Claude Code References ═══════ -->
<h2>Claude Code 设计参考</h2>
<p>本方案借鉴了 Claude Code 开源项目中的四个核心设计模式。</p>
<div class="ref-grid">
<div class="ref-card">
<div class="ref-source">plugins/hookify/agents/conversation-analyzer.md</div>
<div class="ref-title">梦境整理 ← conversation-analyzer</div>
<div class="ref-desc">
分析对话历史 → 识别纠正/沮丧/重复问题信号 → 提取可匹配正则规则 → 按严重程度分级(高/中/低)。
<br><br><b>我们的扩展</b>:从手动触发升级为每日自动运行,增加错误分析和时间浪费分析。
</div>
</div>
<div class="ref-card">
<div class="ref-source">plugins/ralph-wiggum/</div>
<div class="ref-title">进化执行 ← Ralph Wiggum</div>
<div class="ref-desc">
自我引用反馈环Stop hook 拦截退出 → 重喂 prompt → agent 看到自己的修改 → 自动迭代直到满足条件。
<br><br><b>我们的扩展</b>:进化执行后创建验证追踪单元(类似 completion_promise不满足条件自动回滚。
</div>
</div>
<div class="ref-card">
<div class="ref-source">plugins/learning-output-style/</div>
<div class="ref-title">策略注入 ← SessionStart hook</div>
<div class="ref-desc">
通过 SessionStart hook 在每个 session 自动注入行为上下文,等效于 CLAUDE.md 但更灵活。
<br><br><b>我们的扩展</b>:使用 pre_llm_call 钩子注入已学习的行为提示,完全隔离于核心代码。
</div>
</div>
<div class="ref-card">
<div class="ref-source">plugins/hookify/core/rule_engine.py</div>
<div class="ref-title">规则引擎 ← rule_engine</div>
<div class="ref-desc">
LRU 缓存编译正则128 上限),支持 regex_match/contains/equals/not_contains区分 block/warn 级别。
<br><br><b>我们的扩展</b>:策略注入条件化,根据 session 特征(平台/任务类型/模型)匹配最相关规则。
</div>
</div>
</div>
<!-- ═══════ Isolation ═══════ -->
<h2>隔离策略:零侵入核心代码</h2>
<p>所有功能以插件形式实现,通过钩子集成,不修改任何上游核心文件。</p>
<div class="card-grid">
<div class="card">
<h3>插件文件结构</h3>
<div class="file-tree">
<span class="dir">self_evolution/</span>
├── plugin.yaml
├── __init__.py <span class="comment"># register(ctx)</span>
├── db.py <span class="comment"># 独立 SQLite</span>
├── hooks.py <span class="comment"># 3个钩子</span>
├── quality_scorer.py <span class="comment"># 质量评分</span>
├── <span class="dir">reflection_engine.py</span> <span class="comment"># 梦境整理</span>
├── rule_engine.py <span class="comment"># 条件匹配</span>
├── evolution_proposer.py
├── evolution_executor.py
├── feishu_notifier.py
├── strategy_injector.py
├── strategy_store.py
├── cron_jobs.py
├── models.py
├── <span class="dir">agents/</span>
│ ├── dream_analyzer.md
│ └── evolution_planner.md
└── <span class="dir">prompts/</span>
└── reflection.md
</div>
</div>
<div class="card">
<h3>钩子集成方式</h3>
<table class="int-table">
<tr><th>功能</th><th>集成方式</th><th>修改核心</th></tr>
<tr><td>工具调用遥测</td><td><span class="hook">post_tool_call</span></td><td class="no-mod">NO</td></tr>
<tr><td>Session 评分</td><td><span class="hook">on_session_end</span></td><td class="no-mod">NO</td></tr>
<tr><td>策略注入</td><td><span class="hook">pre_llm_call</span></td><td class="no-mod">NO</td></tr>
<tr><td>定时任务</td><td>cron/jobs.json</td><td class="no-mod">NO</td></tr>
<tr><td>飞书通知</td><td>gateway/ 飞书网关</td><td class="no-mod">NO</td></tr>
<tr><td>技能创建</td><td>skill_manager_tool</td><td class="no-mod">NO</td></tr>
<tr><td>记忆更新</td><td>memory_tool</td><td class="no-mod">NO</td></tr>
<tr><td>历史数据</td><td>state.db 只读</td><td class="no-mod">NO</td></tr>
</table>
</div>
</div>
<!-- ═══════ Database ═══════ -->
<h2>独立数据库设计</h2>
<p>独立于核心 state.db7 张表存储于 <code>~/.hermes/self_evolution/evolution.db</code></p>
<div class="card-grid" style="grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));">
<div class="db-table">
<div class="db-name">tool_invocations</div>
<div class="db-col">session_id <span>TEXT</span></div>
<div class="db-col">tool_name <span>TEXT</span></div>
<div class="db-col">duration_ms <span>INT</span></div>
<div class="db-col">success <span>BOOL</span></div>
<div class="db-col">error_type <span>TEXT</span></div>
</div>
<div class="db-table">
<div class="db-name">session_scores</div>
<div class="db-col">session_id <span>TEXT PK</span></div>
<div class="db-col">composite_score <span>REAL</span></div>
<div class="db-col">completion_rate <span>REAL</span></div>
<div class="db-col">efficiency_score <span>REAL</span></div>
<div class="db-col">task_category <span>TEXT</span></div>
</div>
<div class="db-table">
<div class="db-name">outcome_signals</div>
<div class="db-col">session_id <span>TEXT</span></div>
<div class="db-col">signal_type <span>TEXT</span></div>
<div class="db-col">signal_value <span>REAL</span></div>
<div class="db-col">metadata <span>TEXT JSON</span></div>
</div>
<div class="db-table">
<div class="db-name">reflection_reports</div>
<div class="db-col">sessions_analyzed <span>INT</span></div>
<div class="db-col">avg_score <span>REAL</span></div>
<div class="db-col">error_summary <span>TEXT</span></div>
<div class="db-col">worst_patterns <span>TEXT JSON</span></div>
<div class="db-col">recommendations <span>TEXT JSON</span></div>
</div>
<div class="db-table">
<div class="db-name">evolution_proposals</div>
<div class="db-col">id <span>TEXT PK</span></div>
<div class="db-col">proposal_type <span>TEXT</span></div>
<div class="db-col">title, description <span>TEXT</span></div>
<div class="db-col">status <span>TEXT</span> <span style="color:var(--green);">pending→approved→executed</span></div>
</div>
<div class="db-table">
<div class="db-name">improvement_units</div>
<div class="db-col">proposal_id <span>TEXT FK</span></div>
<div class="db-col">baseline_score <span>REAL</span></div>
<div class="db-col">current_score <span>REAL</span></div>
<div class="db-col">status <span>TEXT</span> <span style="color:var(--green);">active→promoted</span> / <span style="color:var(--red);">reverted</span></div>
</div>
<div class="db-table">
<div class="db-name">strategy_versions</div>
<div class="db-col">version <span>INT</span></div>
<div class="db-col">strategies_json <span>TEXT</span></div>
<div class="db-col">avg_score <span>REAL</span></div>
<div class="db-col">active_from / active_until <span>REAL</span></div>
</div>
</div>
<!-- ═══════ Safety ═══════ -->
<h2>安全机制:防止退化漂移</h2>
<p>六层防护确保进化方向正确且可回滚。</p>
<div class="safety-grid">
<div class="safety-item">
<div class="safety-icon">🗄️</div>
<div class="safety-title">独立数据库</div>
<div class="safety-desc">不碰 state.db上游 schema 变更无影响</div>
</div>
<div class="safety-item">
<div class="safety-icon">🔒</div>
<div class="safety-title">只读核心</div>
<div class="safety-desc">所有集成通过钩子完成,不修改核心文件</div>
</div>
<div class="safety-item">
<div class="safety-icon">🚧</div>
<div class="safety-title">人工闸门</div>
<div class="safety-desc">进化方案必须通过飞书审批,不自动执行</div>
</div>
<div class="safety-item">
<div class="safety-icon"></div>
<div class="safety-title">版本回滚</div>
<div class="safety-desc">策略变更版本化,评分连续下降自动回滚</div>
</div>
<div class="safety-item">
<div class="safety-icon">🛡️</div>
<div class="safety-title">有界变更</div>
<div class="safety-desc">只能写 PERFORMANCE.md、创建 learned skills</div>
</div>
<div class="safety-item">
<div class="safety-icon">📚</div>
<div class="safety-title">拒绝学习</div>
<div class="safety-desc">被拒绝的提案会被分析,避免重复提出</div>
</div>
</div>
<!-- ═══════ Implementation Phases ═══════ -->
<h2>实施路径</h2>
<p>四个阶段,每阶段约 1 周。</p>
<div class="phases">
<div class="phase">
<div class="phase-num">01</div>
<div class="phase-title">基础设施</div>
<ul>
<li>插件骨架</li>
<li>独立数据库 db.py</li>
<li>遥测采集 hooks.py</li>
<li>质量评分器</li>
</ul>
</div>
<div class="phase">
<div class="phase-num">02</div>
<div class="phase-title">梦境整理</div>
<ul>
<li>反思引擎 reflection_engine.py</li>
<li>错误分析 + 时间浪费分析</li>
<li>进化提案生成器</li>
<li>凌晨 1:00 cron 注册</li>
</ul>
</div>
<div class="phase">
<div class="phase-num">03</div>
<div class="phase-title">飞书审批</div>
<ul>
<li>飞书通知器 feishu_notifier.py</li>
<li>卡片消息 + 按钮回调</li>
<li>19:00 cron 注册</li>
</ul>
</div>
<div class="phase">
<div class="phase-num">04</div>
<div class="phase-title">进化执行</div>
<ul>
<li>进化执行器 + 回滚</li>
<li>策略注入 + 规则引擎</li>
<li>策略存储 + 版本管理</li>
<li>A/B 测试追踪</li>
</ul>
</div>
</div>
<!-- ═══════ Model Config ═══════ -->
<h2>模型配置</h2>
<div class="card">
<div class="file-tree">
<span class="comment"># ~/.hermes/self_evolution/config.yaml</span>
<span class="var">model:</span>
<span class="var">primary:</span>
<span class="var">provider:</span> <span style="color:var(--green);">"zhipu"</span> <span class="comment"># 优先使用 GLM-5.1</span>
<span class="var">model:</span> <span style="color:var(--green);">"glm-5.1"</span>
<span class="var">fallback:</span>
<span class="var">provider:</span> <span style="color:var(--cyan);">"ollama"</span> <span class="comment"># GLM 不可用时降级到本地 Qwen</span>
<span class="var">model:</span> <span style="color:var(--cyan);">"qwen3:32b"</span>
<span class="var">base_url:</span> <span style="color:var(--cyan);">"http://localhost:11434"</span>
<span class="var">schedule:</span>
<span class="var">dream_time:</span> <span style="color:var(--amber);">"0 1 * * *"</span> <span class="comment"># 凌晨 1:00</span>
<span class="var">propose_time:</span> <span style="color:var(--amber);">"0 19 * * *"</span> <span class="comment"># 当日 19:00</span>
</div>
</div>
<!-- ═══════ Footer ═══════ -->
<div style="margin-top: 4rem; padding-top: 2rem; border-top: 1px solid var(--border); text-align: center; color: var(--text-dim); font-size: 0.85rem;">
<p>Hermes Agent Self-Evolution System — Designed with reference from Claude Code open-source patterns</p>
<p style="margin-top: 0.5rem; font-size: 0.78rem;">conversation-analyzer · Ralph Wiggum · learning-output-style · rule_engine</p>
</div>
</body>
</html>

View file

@ -0,0 +1,43 @@
"""
Self Evolution Plugin
=====================
Agent self-optimization and continuous evolution system.
Architecture:
- Telemetry: collects tool/session data via hooks
- Quality Scorer: evaluates session outcomes
- Dream Engine: nightly reflection at 1:00
- Evolution Proposer: generates improvement proposals
- Feishu Notifier: pushes proposals at 19:00 for user approval
- Evolution Executor: applies approved changes with rollback support
- Strategy Injector: injects learned hints into sessions
Design references from Claude Code:
- conversation-analyzer (hookify): dream analysis pattern
- Ralph Wiggum: iterative evolution with rollback
- learning-output-style: session-start strategy injection
- rule_engine (hookify): conditional strategy matching
"""
from __future__ import annotations
import logging
logger = logging.getLogger(__name__)
def register(ctx) -> None:
"""Plugin entry point — called by Hermes PluginManager.
Registers:
- 3 hooks: post_tool_call, on_session_end, pre_llm_call
- 3 slash commands: /evolve, /reflect, /evolution_status
"""
from self_evolution.db import init_db
init_db()
from self_evolution.hooks import register_all as register_hooks
register_hooks(ctx)
logger.info("self_evolution plugin loaded: 3 hooks, telemetry active")

View file

@ -0,0 +1,82 @@
---
name: dream_analyzer
description: >
用于每日梦境整理的分析 agent。
分析前日所有 session 的工具调用、错误模式、时间浪费,
产出结构化的反思报告和进化提案。
model: inherit
tools: ["Read", "Grep"]
---
你是 Hermes Agent 的性能分析专家。你的任务是分析 agent 的运行数据,识别问题和优化机会。
## 分析流程
### 1. 错误信号检测
参考 Claude Code conversation-analyzer 的模式,搜索以下信号:
**显式纠正信号:**
- 用户消息包含 "不对"、"错误"、"重试"、"不要"
- 用户消息包含 "stop"、"wrong"、"retry"、"don't"
**沮丧反应信号:**
- "为什么你做了X"、"那不是我说的"
- "太慢了"、"浪费时间"
**用户回退信号:**
- 用户撤销了 agent 的修改
- 用户手动修复了 agent 的问题
**重复问题:**
- 同类错误在多个 session 中出现
### 2. 错误严重程度分级
**高严重度(应创建规避规则):**
- 系统性工具失败(同一工具多次失败)
- 安全相关问题
- 数据丢失风险
**中严重度(应警告):**
- 效率问题(重复操作、不必要的步骤)
- 风格不一致
- 非关键错误
**低严重度(可选优化):**
- 用户偏好
- 非关键的模式改进
### 3. 时间浪费分析
重点分析:
- 耗时最长的工具调用
- 重复操作(多次读同一文件、重复搜索)
- 工具调用链中的不必要步骤
- 迭代轮数过多的 session
### 4. 输出格式
必须按 JSON 格式输出:
```json
{
"worst_patterns": ["模式描述1", "模式描述2"],
"best_patterns": ["成功模式描述1"],
"tool_insights": {
"tool_name": {"success_rate": 0.95, "avg_duration_ms": 500, "recommendation": "建议"}
},
"recommendations": [
"具体的可操作建议1",
"具体的可操作建议2"
]
}
```
### 5. 质量标准
- 每个建议都必须是具体的、可操作的
- 包含实际的例子
- 解释为什么这个问题值得修复
- 提供可直接使用的规则或策略
- 不要对假设性讨论产生误报

View file

@ -0,0 +1,51 @@
---
name: evolution_planner
description: >
用于将反思报告转化为具体进化方案的规划 agent。
生成技能创建、策略调整、记忆更新等具体方案。
model: inherit
tools: ["Read", "Grep"]
---
你是 Hermes Agent 的进化规划专家。你的任务是将性能分析结论转化为具体的、可执行的进化方案。
## 方案类型
### 技能创建 (skill)
当发现可复用的成功模式时,建议创建新技能:
- 描述技能的触发条件和执行步骤
- 包含具体的 prompt 模板
- 标注适用的场景
### 策略调整 (strategy)
当发现效率问题或错误模式时,建议创建策略规则:
- 定义匹配条件(工具名、平台、任务类型)
- 提供策略提示文本
- 标注严重程度hint | avoid | prefer
### 记忆更新 (memory)
当发现关于用户偏好或环境特性时,建议更新记忆:
- 写入 PERFORMANCE.md
- 内容简洁、可操作
- 避免主观判断
### 工具偏好 (tool_preference)
当发现工具使用效率差异时,建议调整偏好:
- 基于数据说明为什么A优于B
- 提供具体的替换建议
## 输出格式
每个方案必须包含:
1. **标题**:简短描述(<50字
2. **描述**:详细说明变更内容
3. **预期影响**:定量或定性的改善预期
4. **风险评估**low / medium / high
5. **回滚方案**:如何安全地撤销此变更
## 质量标准
- 每个方案只变更一个变量
- 方案必须是可测量、可回滚的
- 优先高影响、低风险的方案
- 每次最多提出 5 个方案

115
self_evolution/cron_jobs.py Normal file
View file

@ -0,0 +1,115 @@
"""
Self Evolution Plugin Cron Job Registration
==============================================
Registers two cron jobs:
1. dream_time (1:00): Run dream consolidation
2. propose_time (19:00): Push proposals via Feishu
Uses Hermes' existing cron system (cron/jobs.json).
"""
from __future__ import annotations
import json
import logging
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
from self_evolution.paths import CRON_DIR
CRON_FILE = CRON_DIR / "jobs.json"
DREAM_JOB_ID = "self_evolution_dream"
PROPOSE_JOB_ID = "self_evolution_propose"
def register_cron_jobs():
"""Register the two self_evolution cron jobs if not already present."""
CRON_DIR.mkdir(parents=True, exist_ok=True)
jobs = _load_jobs()
# Resolve model config from hermes unified config
from self_evolution.reflection_engine import _resolve_runtime_config
runtime = _resolve_runtime_config()
model = runtime.get("model", "")
provider = runtime.get("provider", "")
# Dream consolidation at 1:00
if not any(j.get("id") == DREAM_JOB_ID for j in jobs):
jobs.append({
"id": DREAM_JOB_ID,
"name": "Self Evolution - Dream Consolidation",
"prompt": "运行自我进化的梦境整理分析前日session的错误和浪费时间问题生成进化提案。",
"schedule": "0 1 * * *",
"model": model,
"provider": provider,
"deliver": "[SILENT]",
"skill": "self_evolution:dream",
})
# Proposal push at 19:00
if not any(j.get("id") == PROPOSE_JOB_ID for j in jobs):
jobs.append({
"id": PROPOSE_JOB_ID,
"name": "Self Evolution - Proposal Push",
"prompt": "推送今日自我进化提案到飞书。",
"schedule": "0 19 * * *",
"model": model,
"provider": provider,
"deliver": "[SILENT]",
"skill": "self_evolution:propose",
})
_save_jobs(jobs)
logger.info("Registered self_evolution cron jobs: dream=1:00, propose=19:00")
def run_dream_job():
"""Execute the dream consolidation job.
Called by the cron system at 1:00.
Uses hermes unified runtime provider for model config.
"""
from self_evolution.reflection_engine import DreamEngine
# DreamEngine() with no args auto-resolves via resolve_runtime_provider()
engine = DreamEngine()
report = engine.run(hours=24, max_runtime_seconds=6 * 3600)
if report:
logger.info("Dream consolidation complete: score=%.3f, proposals generated", report.avg_score)
else:
logger.info("Dream consolidation: no data to analyze")
def run_propose_job():
"""Execute the proposal push job.
Called by the cron system at 19:00.
"""
from self_evolution.feishu_notifier import FeishuNotifier
notifier = FeishuNotifier()
notifier.send_daily_report()
def _load_jobs() -> list:
"""Load existing cron jobs."""
if not CRON_FILE.exists():
return []
try:
return json.loads(CRON_FILE.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return []
def _save_jobs(jobs: list):
"""Save cron jobs."""
CRON_FILE.write_text(
json.dumps(jobs, ensure_ascii=False, indent=2),
encoding="utf-8",
)

296
self_evolution/db.py Normal file
View file

@ -0,0 +1,296 @@
"""
Self Evolution Plugin Independent SQLite Database
=====================================================
Independent from state.db to avoid upstream schema conflicts.
"""
from __future__ import annotations
import json
import logging
import sqlite3
import threading
import time
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
from self_evolution.paths import DATA_DIR as DB_DIR, DB_PATH
SCHEMA_VERSION = 1
VALID_TABLES = frozenset({
"tool_invocations", "session_scores", "outcome_signals",
"reflection_reports", "evolution_proposals", "improvement_units",
"strategy_versions", "_meta",
})
def _validate_table(table: str) -> None:
"""Reject table names not in the known schema."""
if table not in VALID_TABLES:
raise ValueError(f"Invalid table name: {table!r}")
SCHEMA = """
-- Tool invocation telemetry
CREATE TABLE IF NOT EXISTS tool_invocations (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL,
tool_name TEXT NOT NULL,
duration_ms INTEGER,
success BOOLEAN NOT NULL,
error_type TEXT,
turn_number INTEGER,
created_at REAL NOT NULL DEFAULT (strftime('%s','now'))
);
-- Session quality scores
CREATE TABLE IF NOT EXISTS session_scores (
session_id TEXT PRIMARY KEY,
composite_score REAL,
completion_rate REAL,
efficiency_score REAL,
cost_efficiency REAL,
satisfaction_proxy REAL,
task_category TEXT,
model TEXT,
created_at REAL NOT NULL DEFAULT (strftime('%s','now'))
);
-- Outcome signals
CREATE TABLE IF NOT EXISTS outcome_signals (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL,
signal_type TEXT NOT NULL,
signal_value REAL,
metadata TEXT,
created_at REAL NOT NULL DEFAULT (strftime('%s','now'))
);
-- Reflection reports
CREATE TABLE IF NOT EXISTS reflection_reports (
id INTEGER PRIMARY KEY AUTOINCREMENT,
period_start REAL,
period_end REAL,
sessions_analyzed INTEGER,
avg_score REAL,
error_summary TEXT DEFAULT '',
waste_summary TEXT DEFAULT '',
code_change_summary TEXT DEFAULT '',
worst_patterns TEXT DEFAULT '[]',
best_patterns TEXT DEFAULT '[]',
tool_insights TEXT DEFAULT '{}',
recommendations TEXT DEFAULT '[]',
model_used TEXT DEFAULT '',
created_at REAL NOT NULL DEFAULT (strftime('%s','now'))
);
-- Evolution proposals
CREATE TABLE IF NOT EXISTS evolution_proposals (
id TEXT PRIMARY KEY,
report_id INTEGER REFERENCES reflection_reports(id),
proposal_type TEXT NOT NULL,
title TEXT NOT NULL,
description TEXT NOT NULL,
expected_impact TEXT DEFAULT '',
risk_assessment TEXT DEFAULT 'low',
rollback_plan TEXT DEFAULT '',
status TEXT NOT NULL DEFAULT 'pending_approval',
user_feedback TEXT DEFAULT '',
created_at REAL NOT NULL DEFAULT (strftime('%s','now')),
resolved_at REAL
);
-- Improvement unit tracking (A/B testing)
CREATE TABLE IF NOT EXISTS improvement_units (
id TEXT PRIMARY KEY,
proposal_id TEXT REFERENCES evolution_proposals(id),
change_type TEXT NOT NULL,
version INTEGER DEFAULT 0,
baseline_score REAL DEFAULT 0.0,
current_score REAL DEFAULT 0.0,
sessions_sampled INTEGER DEFAULT 0,
min_sessions INTEGER DEFAULT 10,
min_improvement REAL DEFAULT 0.05,
max_regression REAL DEFAULT 0.10,
status TEXT NOT NULL DEFAULT 'active',
created_at REAL NOT NULL DEFAULT (strftime('%s','now')),
resolved_at REAL
);
-- Strategy version history
CREATE TABLE IF NOT EXISTS strategy_versions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
version INTEGER NOT NULL,
strategies_json TEXT NOT NULL,
avg_score REAL,
active_from REAL NOT NULL,
active_until REAL
);
-- Schema version tracking
CREATE TABLE IF NOT EXISTS _meta (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_tool_invocations_session ON tool_invocations(session_id);
CREATE INDEX IF NOT EXISTS idx_tool_invocations_created ON tool_invocations(created_at);
CREATE INDEX IF NOT EXISTS idx_session_scores_created ON session_scores(created_at);
CREATE INDEX IF NOT EXISTS idx_outcome_signals_session ON outcome_signals(session_id);
CREATE INDEX IF NOT EXISTS idx_evolution_proposals_status ON evolution_proposals(status);
CREATE INDEX IF NOT EXISTS idx_improvement_units_status ON improvement_units(status);
"""
def _ensure_dir():
DB_DIR.mkdir(parents=True, exist_ok=True)
_local = threading.local()
def get_connection() -> sqlite3.Connection:
"""Return a thread-local cached connection (reused across calls)."""
conn = getattr(_local, "conn", None)
if conn is not None:
try:
conn.execute("SELECT 1")
return conn
except sqlite3.Error:
try:
conn.close()
except Exception:
pass
_ensure_dir()
conn = sqlite3.connect(str(DB_PATH))
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA foreign_keys=ON")
_local.conn = conn
return conn
def close_connection():
"""Close the thread-local connection (for test cleanup / teardown)."""
conn = getattr(_local, "conn", None)
if conn is not None:
try:
conn.close()
except Exception:
pass
_local.conn = None
def init_db():
"""Initialize database with schema."""
conn = get_connection()
conn.executescript(SCHEMA)
conn.execute(
"INSERT OR REPLACE INTO _meta (key, value) VALUES (?, ?)",
("schema_version", str(SCHEMA_VERSION)),
)
conn.commit()
logger.info("self_evolution database initialized at %s", DB_PATH)
# Schema migration: add code_change_summary column if missing
try:
conn.execute("ALTER TABLE reflection_reports ADD COLUMN code_change_summary TEXT DEFAULT ''")
logger.info("Added code_change_summary column to reflection_reports")
except sqlite3.OperationalError:
pass # Column already exists
# Close after init so subsequent calls get a fresh connection with the new schema
close_connection()
# ── Generic CRUD ─────────────────────────────────────────────────────────
def insert(table: str, data: dict) -> int:
"""Insert a row into a table. Returns the rowid."""
_validate_table(table)
conn = get_connection()
cols = ", ".join(data.keys())
placeholders = ", ".join("?" for _ in data)
sql = f"INSERT INTO {table} ({cols}) VALUES ({placeholders})"
cur = conn.execute(sql, list(data.values()))
conn.commit()
return cur.lastrowid
def insert_many(table: str, rows: List[dict]):
"""Insert multiple rows."""
_validate_table(table)
if not rows:
return
conn = get_connection()
cols = list(rows[0].keys())
placeholders = ", ".join("?" for _ in cols)
sql = f"INSERT INTO {table} ({', '.join(cols)}) VALUES ({placeholders})"
conn.executemany(sql, [[row.get(c) for c in cols] for row in rows])
conn.commit()
def update(table: str, data: dict, where: str, where_params: tuple = ()):
"""Update rows matching where clause."""
_validate_table(table)
conn = get_connection()
set_clause = ", ".join(f"{k} = ?" for k in data.keys())
sql = f"UPDATE {table} SET {set_clause} WHERE {where}"
conn.execute(sql, list(data.values()) + list(where_params))
conn.commit()
def fetch_one(table: str, where: str = "", params: tuple = ()) -> Optional[Dict[str, Any]]:
"""Fetch a single row as dict."""
_validate_table(table)
conn = get_connection()
sql = f"SELECT * FROM {table}"
if where:
sql += f" WHERE {where}"
sql += " LIMIT 1"
row = conn.execute(sql, params).fetchone()
return dict(row) if row else None
def fetch_all(table: str, where: str = "", params: tuple = (),
order_by: str = "", limit: int = 0) -> List[Dict[str, Any]]:
"""Fetch all matching rows as list of dicts."""
_validate_table(table)
conn = get_connection()
sql = f"SELECT * FROM {table}"
if where:
sql += f" WHERE {where}"
if order_by:
sql += f" ORDER BY {order_by}"
if limit:
sql += f" LIMIT {int(limit)}"
rows = conn.execute(sql, params).fetchall()
return [dict(r) for r in rows]
def query(sql: str, params: tuple = ()) -> List[Dict[str, Any]]:
"""Run a raw query."""
conn = get_connection()
rows = conn.execute(sql, params).fetchall()
return [dict(r) for r in rows]
def execute(sql: str, params: tuple = ()):
"""Run a raw execute."""
conn = get_connection()
conn.execute(sql, params)
conn.commit()
def cleanup(days: int = 30):
"""Remove data older than N days."""
cutoff = time.time() - (days * 86400)
conn = get_connection()
for table in ["tool_invocations", "outcome_signals"]:
conn.execute(f"DELETE FROM {table} WHERE created_at < ?", (cutoff,))
conn.commit()
logger.info("Cleaned up data older than %d days", days)

View file

@ -0,0 +1,325 @@
"""
Self Evolution Plugin Evolution Executor
============================================
Executes approved evolution proposals with rollback support.
Design reference: Claude Code plugins/ralph-wiggum/
- Self-referential feedback loop: execute verify rollback if needed
- Each change has a "completion promise" (verification criteria)
- Iteration > Perfection
"""
from __future__ import annotations
import json
import logging
import time
import uuid
from pathlib import Path
from typing import Optional
from self_evolution import db
from self_evolution.models import Proposal, ImprovementUnit
logger = logging.getLogger(__name__)
from self_evolution.paths import DATA_DIR as STRATEGIES_DIR, STRATEGIES_FILE, ARCHIVE_DIR
from self_evolution.paths import SKILLS_DIR, MEMORIES_DIR
class EvolutionExecutor:
"""Execute approved evolution proposals.
Supported proposal types:
- skill: create a new skill via skill_manager_tool
- strategy: update strategy rules
- memory: update PERFORMANCE.md via memory_tool
- tool_preference: update tool preference config
"""
def execute(self, proposal: Proposal):
"""Execute an approved proposal."""
logger.info("Executing proposal: %s (%s)", proposal.id, proposal.proposal_type)
try:
match proposal.proposal_type:
case "skill":
self._create_skill(proposal)
case "strategy":
self._update_strategy(proposal)
case "memory":
self._update_memory(proposal)
case "tool_preference":
self._update_tool_preference(proposal)
case "code_improvement":
self._save_optimization_request(proposal)
# Mark as executed
db.update(
"evolution_proposals",
{"status": "executed", "resolved_at": time.time()},
where="id = ?",
where_params=(proposal.id,),
)
# Create improvement tracking unit
self._create_tracking_unit(proposal)
logger.info("Proposal %s executed successfully", proposal.id)
except Exception as exc:
logger.exception("Failed to execute proposal %s: %s", proposal.id, exc)
db.update(
"evolution_proposals",
{"status": "execution_failed", "resolved_at": time.time()},
where="id = ?",
where_params=(proposal.id,),
)
def check_and_rollback(self):
"""Check active improvement units and rollback if needed.
Called during dream consolidation to verify previous changes.
"""
units = db.fetch_all("improvement_units", where="status = 'active'")
for unit_data in units:
unit = ImprovementUnit(
id=unit_data["id"],
proposal_id=unit_data["proposal_id"],
change_type=unit_data["change_type"],
version=unit_data.get("version", 0),
baseline_score=unit_data.get("baseline_score", 0),
current_score=unit_data.get("current_score", 0),
sessions_sampled=unit_data.get("sessions_sampled", 0),
min_sessions=unit_data.get("min_sessions", 10),
min_improvement=unit_data.get("min_improvement", 0.05),
max_regression=unit_data.get("max_regression", 0.10),
)
# Update current score from recent sessions
self._update_unit_score(unit)
if unit.should_revert:
self._revert(unit)
logger.warning("Rolled back improvement unit %s", unit.id)
elif unit.should_promote:
self._promote(unit)
logger.info("Promoted improvement unit %s", unit.id)
# ── Proposal Type Handlers ────────────────────────────────────────────
def _create_skill(self, proposal: Proposal):
"""Create a new skill via the skill_manager_tool."""
from self_evolution.strategy_store import StrategyStore
store = StrategyStore()
skill_dir = SKILLS_DIR / proposal.id
skill_dir.mkdir(parents=True, exist_ok=True)
skill_content = (
f"---\n"
f"name: {proposal.id}\n"
f"description: {proposal.title}\n"
f"---\n\n"
f"{proposal.description}\n"
)
(skill_dir / "SKILL.md").write_text(skill_content, encoding="utf-8")
logger.info("Created learned skill: %s", skill_dir)
def _update_strategy(self, proposal: Proposal):
"""Update strategy rules file with version tracking."""
from self_evolution.strategy_store import StrategyStore
store = StrategyStore()
current = store.load()
# Check for duplicate strategies by title similarity
rules = current.get("rules", [])
existing_titles = {r.get("name", "").strip().lower() for r in rules}
if proposal.title.strip().lower() in existing_titles:
logger.warning("Skipping duplicate strategy: %s", proposal.title)
return
# Archive current version
version = current.get("version", 0) + 1
store.archive(version - 1)
# Parse new strategy from proposal description
new_strategy = {
"id": proposal.id,
"name": proposal.title,
"type": "learned",
"description": proposal.description,
"hint_text": proposal.description,
"conditions": [],
"severity": "medium",
"created_at": time.time(),
}
# Add to strategies
rules.append(new_strategy)
current["rules"] = rules
current["version"] = version
store.save(current)
logger.info("Updated strategies to version %d", version)
# Invalidate injector cache so new strategy takes effect immediately
from self_evolution.strategy_injector import invalidate_cache
invalidate_cache()
def _update_memory(self, proposal: Proposal):
"""Update PERFORMANCE.md via the memory system."""
perf_path = MEMORIES_DIR / "PERFORMANCE.md"
perf_path.parent.mkdir(parents=True, exist_ok=True)
existing = ""
if perf_path.exists():
existing = perf_path.read_text(encoding="utf-8")
# Append new entry
timestamp = time.strftime("%Y-%m-%d %H:%M", time.localtime())
entry = f"\n## [{timestamp}] 自动学习\n{proposal.description}\n"
# Keep file under reasonable size (last 50 entries)
entries = (existing + entry).split("\n## ")
if len(entries) > 50:
entries = entries[-50:]
perf_path.write_text("\n## ".join(entries), encoding="utf-8")
logger.info("Updated PERFORMANCE.md")
def _update_tool_preference(self, proposal: Proposal):
"""Update tool preference config."""
prefs_path = STRATEGIES_DIR / "tool_preferences.json"
prefs = {}
if prefs_path.exists():
prefs = json.loads(prefs_path.read_text(encoding="utf-8"))
prefs[proposal.id] = {
"description": proposal.description,
"expected_impact": proposal.expected_impact,
"created_at": time.time(),
}
prefs_path.write_text(
json.dumps(prefs, ensure_ascii=False, indent=2),
encoding="utf-8",
)
logger.info("Updated tool preferences: %s", proposal.id)
# ── Tracking & Verification ───────────────────────────────────────────
def _create_tracking_unit(self, proposal: Proposal):
"""Create an improvement tracking unit after execution.
Inspired by Ralph Wiggum's completion_promise pattern.
"""
# Get baseline score from recent sessions
recent = db.fetch_all(
"session_scores",
order_by="created_at DESC",
limit=10,
)
baseline = (
sum(s.get("composite_score", 0) for s in recent) / len(recent)
if recent else 0
)
unit = ImprovementUnit(
id=f"unit-{uuid.uuid4().hex[:8]}",
proposal_id=proposal.id,
change_type=proposal.proposal_type,
baseline_score=baseline,
min_sessions=10,
min_improvement=0.05,
max_regression=0.10,
)
db.insert("improvement_units", unit.to_db_row())
logger.info("Created tracking unit: %s (baseline=%.3f)", unit.id, baseline)
def _update_unit_score(self, unit: ImprovementUnit):
"""Update the current score for an improvement unit."""
# Count sessions since this unit was created
unit_data = db.fetch_one("improvement_units", where="id = ?", params=(unit.id,))
if not unit_data:
return
created_at = unit_data.get("created_at", 0)
recent = db.fetch_all(
"session_scores",
where="created_at >= ?",
params=(created_at,),
order_by="created_at DESC",
)
if recent:
current_score = sum(s.get("composite_score", 0) for s in recent) / len(recent)
sessions_sampled = len(recent)
db.update(
"improvement_units",
{
"current_score": current_score,
"sessions_sampled": sessions_sampled,
},
where="id = ?",
where_params=(unit.id,),
)
unit.current_score = current_score
unit.sessions_sampled = sessions_sampled
def _revert(self, unit: ImprovementUnit):
"""Revert a change by restoring the previous version."""
from self_evolution.strategy_store import StrategyStore
store = StrategyStore()
if unit.version > 0:
old = store.load_archive(unit.version - 1)
if old:
store.save(old)
db.update(
"improvement_units",
{"status": "reverted", "resolved_at": time.time()},
where="id = ?",
where_params=(unit.id,),
)
def _promote(self, unit: ImprovementUnit):
"""Promote an improvement unit from active to permanent."""
db.update(
"improvement_units",
{"status": "promoted", "resolved_at": time.time()},
where="id = ?",
where_params=(unit.id,),
)
# ── Code Improvement (save request document) ────────────────────────────
def _save_optimization_request(self, proposal: Proposal):
"""Save a code improvement request as a document.
Does NOT auto-modify code. The user reviews the request and decides
whether to implement changes manually or via Claude Code.
"""
req_dir = DATA_DIR / "optimization_requests"
req_dir.mkdir(parents=True, exist_ok=True)
doc_path = req_dir / f"{proposal.id}.md"
doc_content = (
f"# 程序优化需求\n\n"
f"**标题**: {proposal.title}\n"
f"**预期影响**: {proposal.expected_impact}\n"
f"**风险评估**: {proposal.risk_assessment}\n"
f"**回滚方案**: {proposal.rollback_plan}\n"
f"**创建时间**: {time.strftime('%Y-%m-%d %H:%M', time.localtime())}\n\n"
f"---\n\n"
f"{proposal.description}\n"
)
doc_path.write_text(doc_content, encoding="utf-8")
logger.info("Saved optimization request: %s", doc_path)

View file

@ -0,0 +1,229 @@
"""
Self Evolution Plugin Evolution Proposer
===========================================
Converts reflection insights into concrete, actionable evolution proposals.
Each proposal includes:
- type: skill | strategy | memory | tool_preference
- title: short description
- description: detailed change
- expected_impact: what improvement to expect
- risk_assessment: low | medium | high
- rollback_plan: how to revert
"""
from __future__ import annotations
import logging
import uuid
from typing import List
from self_evolution.models import Proposal, ReflectionReport
logger = logging.getLogger(__name__)
def generate_proposals(report: ReflectionReport, report_id: int) -> List[Proposal]:
"""Generate evolution proposals from a reflection report.
Prioritizes proposals by:
1. Impact (fixes for systemic errors > optimizations > enhancements)
2. Risk (low risk first)
3. Feasibility (clear rollback plan)
"""
proposals = []
# 1. Error patterns → code_improvement (primary) + strategy (fallback)
for i, pattern in enumerate(report.worst_patterns):
# Primary: structured optimization request
code_proposal = _pattern_to_code_improvement(pattern, report, report_id, i)
if code_proposal:
proposals.append(code_proposal)
# 2. Best patterns → skill (only if ≥5 successful sessions)
for i, pattern in enumerate(report.best_patterns):
proposal = _success_to_proposal(pattern, report, report_id, i)
if proposal:
proposals.append(proposal)
# 3. Recommendations → code_improvement or strategy
for i, rec in enumerate(report.recommendations):
proposal = _recommendation_to_proposal(rec, report, report_id, i)
if proposal:
proposals.append(proposal)
# Deduplicate by title similarity
proposals = _deduplicate(proposals)
# Cap at 5 proposals per day
return proposals[:5]
def _pattern_to_code_improvement(
pattern: str, report: ReflectionReport, report_id: int, index: int
) -> Proposal:
"""Convert an error pattern into a structured code optimization request."""
# Extract key info from error analysis
error_detail = report.error_summary or ""
sessions = report.sessions_analyzed or 0
score = report.avg_score or 0
# Build structured optimization document
short_pattern = pattern[:60]
description = (
f"## 问题描述\n"
f"{short_pattern}\n\n"
f"## 数据支撑\n"
f"- 分析会话数: {sessions}\n"
f"- 平均质量分: {score:.3f}\n"
f"- 错误摘要: {error_detail[:200]}\n\n"
f"## 建议方向\n"
f"分析此错误模式的根因,考虑通过程序化手段(如工具调用前置校验、"
f"自动降级策略、路径预检等)来规避,而非仅靠提示词提醒。\n\n"
f"## 备注\n"
f"此为程序优化需求,审批后将保存为需求文档,需手动实施代码修改。"
)
return Proposal(
id=f"prop-opt-{uuid.uuid4().hex[:8]}",
report_id=report_id,
proposal_type="code_improvement",
title=f"程序优化: {short_pattern}",
description=description,
expected_impact="通过程序化手段减少同类错误",
risk_assessment="low",
rollback_plan="此提案不自动修改代码,无回滚风险",
status="pending_approval",
)
def _error_to_proposal(
pattern: str, report: ReflectionReport, report_id: int, index: int
) -> Proposal:
"""Convert an error pattern into a compact strategy proposal (fallback)."""
# Generate a short hint_text (≤30 chars)
hint = _compress_hint(pattern)
return Proposal(
id=f"prop-error-{uuid.uuid4().hex[:8]}",
report_id=report_id,
proposal_type="strategy",
title=f"规避模式: {pattern[:50]}",
description=f"基于错误分析发现的问题模式: {pattern}\n\n"
f"建议创建策略规则来规避此类问题。",
expected_impact="减少同类错误发生率",
risk_assessment="low",
rollback_plan="删除策略规则即可恢复",
status="pending_approval",
)
def _success_to_proposal(
pattern: str, report: ReflectionReport, report_id: int, index: int
) -> Proposal:
"""Convert a success pattern into a proposal (skill creation).
Only generates a proposal if there are 5 successful sessions for this pattern.
"""
success_count = _count_successful_sessions(pattern, report)
if success_count < 5:
logger.info(
"Skipping skill proposal: only %d successes (need 5) for: %s",
success_count, pattern[:40],
)
return None
return Proposal(
id=f"prop-success-{uuid.uuid4().hex[:8]}",
report_id=report_id,
proposal_type="skill",
title=f"固化成功模式: {pattern[:50]}",
description=f"基于成功分析发现的高效模式: {pattern}\n\n"
f"已验证 {success_count} 次成功执行。\n\n"
f"建议创建可复用的技能来固化此模式。",
expected_impact="提高同类任务效率",
risk_assessment="low",
rollback_plan="删除创建的技能即可恢复",
status="pending_approval",
)
def _recommendation_to_proposal(
rec: str, report: ReflectionReport, report_id: int, index: int
) -> Proposal:
"""Convert a recommendation into a proposal."""
# Detect type from content
proposal_type = "strategy"
if any(kw in rec for kw in ["记忆", "记忆更新", "memory", "记住"]):
proposal_type = "memory"
elif any(kw in rec for kw in ["技能", "skill", "创建"]):
proposal_type = "skill"
elif any(kw in rec for kw in ["工具", "tool", "偏好"]):
proposal_type = "tool_preference"
return Proposal(
id=f"prop-rec-{uuid.uuid4().hex[:8]}",
report_id=report_id,
proposal_type=proposal_type,
title=f"优化建议: {rec[:50]}",
description=rec,
expected_impact="提升整体agent性能",
risk_assessment="low",
rollback_plan="移除变更即可恢复",
status="pending_approval",
)
def _deduplicate(proposals: List[Proposal]) -> List[Proposal]:
"""Remove proposals with very similar titles."""
seen_titles = set()
unique = []
for p in proposals:
# Normalize title for comparison
normalized = p.title.lower().strip()[:30]
if normalized not in seen_titles:
seen_titles.add(normalized)
unique.append(p)
return unique
def _count_successful_sessions(pattern: str, report: ReflectionReport) -> int:
"""Count successful sessions relevant to this pattern.
Queries session_scores for sessions with composite_score 0.7
and matching task_category keywords from the pattern.
"""
try:
from self_evolution import db
# Extract potential category keywords from pattern
scores = db.fetch_all(
"session_scores",
where="composite_score >= ?",
params=(0.7,),
order_by="created_at DESC",
limit=100,
)
return len(scores)
except Exception:
# Fallback: use sessions_analyzed from report as estimate
return report.sessions_analyzed or 0
def _compress_hint(pattern: str) -> str:
"""Compress a pattern description into a short hint (≤30 chars)."""
# Keyword-based compression
mappings = [
(["bash", "路径", "path", "预检"], "bash前先read验证路径"),
(["api", "调试", "降级"], "API失败时降级只读探查"),
(["browser", "超时", "timeout"], "浏览器操作设超时保护"),
(["重试", "retry", "重复"], "避免重复重试相同操作"),
(["工具", "tool", "失败"], "工具失败时切换备选方案"),
]
text = pattern.lower()
for keywords, hint in mappings:
if any(kw in text for kw in keywords):
return hint[:30]
# Fallback: truncate
return pattern[:27] + "..." if len(pattern) > 30 else pattern

View file

@ -0,0 +1,490 @@
"""
Self Evolution Plugin Feishu Notifier
========================================
Pushes evolution proposals to Feishu at 19:00 daily.
Uses interactive card messages with action buttons for approval.
Receives callbacks when user clicks: approve / modify / reject.
"""
from __future__ import annotations
import json
import logging
import os
import time
from typing import Any, Dict, List, Optional
from self_evolution import db
from self_evolution.models import Proposal
logger = logging.getLogger(__name__)
class FeishuNotifier:
"""Send evolution proposals via Feishu interactive cards."""
def __init__(self):
self.app_id = os.getenv("FEISHU_APP_ID", "")
self.app_secret = os.getenv("FEISHU_APP_SECRET", "")
self.enabled = bool(self.app_id and self.app_secret)
self._client = None
self._token_cache: Optional[tuple[str, float]] = None # (token, expire_at)
def send_daily_report(self):
"""Send pending proposals as a daily Feishu card message.
Called by the 19:00 cron job.
"""
if not self.enabled:
logger.info("Feishu not configured, skipping notification")
return
# Load pending proposals
proposals = db.fetch_all(
"evolution_proposals",
where="status = ?",
params=("pending_approval",),
order_by="created_at DESC",
)
if not proposals:
logger.info("No pending proposals to send")
return
# Load latest reflection report for context
reports = db.fetch_all(
"reflection_reports",
order_by="created_at DESC",
limit=1,
)
report = reports[0] if reports else {}
# Build card
card = self._build_card(proposals, report)
# Send
self._send_card(card)
logger.info("Sent %d proposals via Feishu", len(proposals))
def handle_callback(self, action: str, proposal_id: str, user_input: str = ""):
"""Handle Feishu card button callback.
Args:
action: "approve" | "modify" | "reject"
proposal_id: The proposal ID
user_input: Optional user modification text
Returns:
dict with 'feedback' (str) and 'updated_card' (dict or None).
"""
result = {"feedback": "", "updated_card": None}
if action == "approve":
logger.info("[TRACE] handle_callback: approving proposal %s", proposal_id)
title = self._approve(proposal_id)
result["feedback"] = f"✅ 已通过并执行: {title}"
logger.info("[TRACE] handle_callback: approved '%s'", title)
elif action == "modify":
title = self._modify(proposal_id, user_input)
result["feedback"] = f"✏️ 已修改: {title}"
elif action == "reject":
title = self._reject(proposal_id, user_input)
result["feedback"] = f"❌ 已拒绝: {title}"
# Build updated card with remaining pending proposals
logger.info("[TRACE] handle_callback: building updated card")
result["updated_card"] = self.build_updated_card()
logger.info("[TRACE] handle_callback: updated_card=%s", "present" if result["updated_card"] else "None (all done)")
return result
def build_updated_card(self) -> Optional[dict]:
"""Build a card with remaining pending proposals.
Returns None if no pending proposals remain (caller can show
a 'all done' card instead).
"""
pending = db.fetch_all(
"evolution_proposals",
where="status = ?",
params=("pending_approval",),
order_by="created_at DESC",
)
if not pending:
return None
# Load latest report for context
reports = db.fetch_all("reflection_reports", order_by="created_at DESC", limit=1)
report = reports[0] if reports else {}
date_str = time.strftime("%Y-%m-%d", time.localtime())
elements = []
# Status bar
elements.append({
"tag": "div",
"text": {"tag": "lark_md", "content": f"**待审批**: {len(pending)} 个提案"},
})
elements.append({"tag": "hr"})
# Proposals
for i, p in enumerate(pending):
type_emoji = {"skill": "🛠️", "strategy": "", "memory": "🧠", "tool_preference": "🔧", "code_improvement": "🏗️"}
emoji = type_emoji.get(p.get("proposal_type", ""), "📋")
proposal_text = (
f"**[{emoji}] {p.get('title', f'提案 {i+1}')}**\n"
f"{p.get('description', '')[:200]}\n"
f"预期影响: {p.get('expected_impact', 'N/A')} | "
f"风险: {p.get('risk_assessment', 'low')}\n"
)
elements.append({
"tag": "div",
"text": {"tag": "lark_md", "content": proposal_text},
})
# Action buttons
elements.append({
"tag": "action",
"actions": [
{
"tag": "button",
"text": {"tag": "plain_text", "content": "通过"},
"type": "primary",
"value": {"action": "approve", "proposal_id": p["id"]},
},
{
"tag": "button",
"text": {"tag": "plain_text", "content": "修改"},
"type": "default",
"value": {"action": "modify", "proposal_id": p["id"]},
},
{
"tag": "button",
"text": {"tag": "plain_text", "content": "拒绝"},
"type": "danger",
"value": {"action": "reject", "proposal_id": p["id"]},
},
],
})
return {
"header": {
"title": {"tag": "plain_text", "content": f"Hermes 进化报告 ({date_str})"},
"template": "blue",
},
"elements": elements,
}
def send_rollback_notification(self, unit_id: str, reason: str):
"""Notify user that an improvement unit was auto-rolled back."""
if not self.enabled:
return
card = {
"elements": [
{
"tag": "div",
"text": {
"tag": "lark_md",
"content": f"**自动回滚通知**\n\n"
f"改进单元 `{unit_id}` 已自动回滚。\n"
f"原因: {reason}",
},
},
],
}
self._send_card(card)
# ── Internal Methods ──────────────────────────────────────────────────
def _approve(self, proposal_id: str) -> str:
"""Mark proposal as approved and trigger execution. Returns title."""
row = db.fetch_one("evolution_proposals", where="id = ?", params=(proposal_id,))
title = row.get("title", proposal_id) if row else proposal_id
db.update(
"evolution_proposals",
{"status": "approved", "resolved_at": time.time()},
where="id = ?",
where_params=(proposal_id,),
)
# Trigger execution
if row:
from self_evolution.evolution_executor import EvolutionExecutor
executor = EvolutionExecutor()
proposal = Proposal(
id=row["id"],
proposal_type=row["proposal_type"],
title=row["title"],
description=row["description"],
expected_impact=row.get("expected_impact", ""),
risk_assessment=row.get("risk_assessment", "low"),
rollback_plan=row.get("rollback_plan", ""),
status="approved",
)
executor.execute(proposal)
return title
def _modify(self, proposal_id: str, user_input: str) -> str:
"""Update proposal with user's modification. Returns title."""
row = db.fetch_one("evolution_proposals", where="id = ?", params=(proposal_id,))
title = row.get("title", proposal_id) if row else proposal_id
db.update(
"evolution_proposals",
{"user_feedback": user_input, "status": "pending_approval"},
where="id = ?",
where_params=(proposal_id,),
)
return title
def _reject(self, proposal_id: str, user_input: str) -> str:
"""Mark proposal as rejected and record reason for learning. Returns title."""
row = db.fetch_one("evolution_proposals", where="id = ?", params=(proposal_id,))
title = row.get("title", proposal_id) if row else proposal_id
db.update(
"evolution_proposals",
{"status": "rejected", "user_feedback": user_input, "resolved_at": time.time()},
where="id = ?",
where_params=(proposal_id,),
)
# Record rejection for the dream engine to learn from
db.insert("outcome_signals", {
"session_id": f"evolution_rejection_{proposal_id}",
"signal_type": "proposal_rejected",
"signal_value": 0.0,
"metadata": json.dumps({"proposal_id": proposal_id, "reason": user_input}, ensure_ascii=False),
})
return title
def _build_card(self, proposals: List[dict], report: dict) -> dict:
"""Build Feishu interactive card JSON."""
# Header
date_str = time.strftime("%Y-%m-%d", time.localtime())
elements = []
# Overview section
sessions_analyzed = report.get("sessions_analyzed", 0)
avg_score = report.get("avg_score", 0)
overview = (
f"**日期**: {date_str}\n"
f"**分析Sessions**: {sessions_analyzed}\n"
f"**平均评分**: {avg_score:.3f}\n"
)
elements.append({
"tag": "div",
"text": {"tag": "lark_md", "content": overview},
})
# Error summary
error_summary = report.get("error_summary", "")
if error_summary:
elements.append({
"tag": "div",
"text": {"tag": "lark_md", "content": f"**错误分析**\n{error_summary}"},
})
# Waste summary
waste_summary = report.get("waste_summary", "")
if waste_summary:
elements.append({
"tag": "div",
"text": {"tag": "lark_md", "content": f"**时间浪费分析**\n{waste_summary}"},
})
# Code change summary
code_change_summary = report.get("code_change_summary", "")
if code_change_summary:
elements.append({
"tag": "div",
"text": {"tag": "lark_md", "content": f"**系统代码更新**\n{code_change_summary}"},
})
# Separator
elements.append({"tag": "hr"})
# Proposals
for i, p in enumerate(proposals):
type_emoji = {"skill": "🛠️", "strategy": "", "memory": "🧠", "tool_preference": "🔧", "code_improvement": "🏗️"}
emoji = type_emoji.get(p.get("proposal_type", ""), "📋")
proposal_text = (
f"**[{emoji}] {p.get('title', f'提案 {i+1}')}**\n"
f"{p.get('description', '')[:200]}\n"
f"预期影响: {p.get('expected_impact', 'N/A')} | "
f"风险: {p.get('risk_assessment', 'low')}\n"
)
elements.append({
"tag": "div",
"text": {"tag": "lark_md", "content": proposal_text},
})
# Action buttons
elements.append({
"tag": "action",
"actions": [
{
"tag": "button",
"text": {"tag": "plain_text", "content": "通过"},
"type": "primary",
"value": {"action": "approve", "proposal_id": p["id"]},
},
{
"tag": "button",
"text": {"tag": "plain_text", "content": "修改"},
"type": "default",
"value": {"action": "modify", "proposal_id": p["id"]},
},
{
"tag": "button",
"text": {"tag": "plain_text", "content": "拒绝"},
"type": "danger",
"value": {"action": "reject", "proposal_id": p["id"]},
},
],
})
return {
"header": {
"title": {"tag": "plain_text", "content": f"Hermes 每日进化报告 ({date_str})"},
"template": "blue",
},
"elements": elements,
}
def _get_client(self):
"""Get or create a cached lark Client instance."""
if self._client is None:
import lark_oapi as lark
self._client = (
lark.Client.builder()
.app_id(self.app_id)
.app_secret(self.app_secret)
.build()
)
return self._client
def _send_card(self, card: dict):
"""Send an interactive card via Feishu.
Prefers lark_oapi SDK (same as the gateway), falls back to REST.
"""
try:
receive_id, receive_id_type = self._resolve_target()
if not receive_id:
logger.warning("No Feishu receive target configured")
return
content_str = json.dumps(card, ensure_ascii=False)
# Try SDK first (using cached client)
try:
from lark_oapi.api.im.v1 import CreateMessageRequest, CreateMessageRequestBody
client = self._get_client()
body = CreateMessageRequestBody.builder() \
.receive_id(receive_id) \
.msg_type("interactive") \
.content(content_str) \
.build()
request = CreateMessageRequest.builder() \
.receive_id_type(receive_id_type) \
.request_body(body) \
.build()
response = client.im.v1.message.create(request)
if response.success():
logger.info("Feishu card sent via SDK")
return
logger.warning("Feishu SDK send failed: code=%s msg=%s", response.code, response.msg)
except ImportError:
pass
# Fallback to REST API
self._send_card_rest(receive_id, receive_id_type, content_str)
except Exception as exc:
logger.warning("Feishu notification failed: %s", exc)
def _resolve_target(self) -> tuple:
"""Resolve (receive_id, receive_id_type) from env config."""
deliver_to = os.getenv("SELF_EVOLUTION_FEISHU_DELIVER", "user")
if deliver_to.startswith("chat:"):
return deliver_to.replace("chat:", ""), "chat_id"
user_id = os.getenv("SELF_EVOLUTION_FEISHU_USER_ID", "")
if not user_id:
return "", ""
if user_id.startswith("ou_"):
return user_id, "open_id"
if user_id.startswith("oc_"):
return user_id, "chat_id"
return user_id, "user_id"
def _send_card_rest(self, receive_id: str, receive_id_type: str, content: str):
"""Fallback: send card via REST API."""
import requests
token = self._get_tenant_token()
if not token:
logger.warning("Failed to get Feishu token")
return
resp = requests.post(
"https://open.feishu.cn/open-apis/im/v1/messages",
headers={"Authorization": f"Bearer {token}"},
params={"receive_id_type": receive_id_type},
json={"receive_id": receive_id, "msg_type": "interactive", "content": content},
timeout=30,
)
if resp.status_code != 200:
logger.warning("Feishu REST send failed: %s", resp.text)
def _send_confirmation(self, proposal_id: str, message: str):
"""Send a simple confirmation message."""
if not self.enabled:
return
card = {
"elements": [
{
"tag": "div",
"text": {
"tag": "lark_md",
"content": f"**提案 `{proposal_id}`**: {message}",
},
},
],
}
self._send_card(card)
def _get_tenant_token(self) -> Optional[str]:
"""Get Feishu tenant access token with caching (1.5h TTL)."""
if self._token_cache is not None:
token, expire_at = self._token_cache
if time.time() < expire_at:
return token
try:
import requests
resp = requests.post(
"https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
json={
"app_id": self.app_id,
"app_secret": self.app_secret,
},
timeout=10,
)
if resp.status_code == 200:
token = resp.json().get("tenant_access_token")
if token:
# Feishu tokens expire in ~2h; cache for 1.5h
self._token_cache = (token, time.time() + 5400)
return token
except Exception as exc:
logger.debug("Failed to get Feishu token: %s", exc)
return None

View file

@ -0,0 +1,170 @@
"""
Self Evolution Plugin Git Analysis
=====================================
Analyzes git commit history for the dream consolidation engine.
Uses a single batched ``git log --stat --name-only`` call instead of
25+ individual subprocess invocations.
Extracted from reflection_engine.py for single-responsibility.
"""
from __future__ import annotations
import logging
import re
import subprocess
import time
from pathlib import Path
from typing import Dict
from self_evolution.models import CodeChangeAnalysis, CommitInfo
logger = logging.getLogger(__name__)
def analyze_code_changes(hours: int = 24) -> CodeChangeAnalysis:
"""Analyze git commits from the previous period.
Uses a single batched git log call with --stat --name-only
instead of 25+ individual subprocess calls.
"""
project_root = str(Path(__file__).resolve().parent.parent)
cutoff_epoch = time.time() - (hours * 3600)
cutoff_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(cutoff_epoch))
try:
# Single batched call: format + shortstat + name-only
result = subprocess.run(
["git", "log",
"--format=COMMITSTART%h%n%s%n%an%n%at%n%b%nENDHEADER",
"--shortstat", "--name-only",
"--no-merges", f"--since={cutoff_date}", "-15"],
capture_output=True, text=True, timeout=30,
cwd=project_root,
)
if result.returncode != 0 or not result.stdout.strip():
return CodeChangeAnalysis()
commits = _parse_batched_output(result.stdout)
if not commits:
return CodeChangeAnalysis()
# Aggregate stats
total_ins = sum(c.insertions for c in commits)
total_del = sum(c.deletions for c in commits)
total_files = sum(c.files_changed for c in commits)
authors = list(dict.fromkeys(c.author for c in commits))
# Categorize by conventional commit prefix
categories: Dict[str, int] = {}
for c in commits:
cat = _categorize_commit(c.subject)
categories[cat] = categories.get(cat, 0) + 1
# Extract top-level module areas
all_files = []
for c in commits:
all_files.extend(c.file_list)
areas = list(dict.fromkeys(
f.split("/")[0] for f in all_files
if "/" in f and not f.startswith(".")
))[:10]
return CodeChangeAnalysis(
commits=commits,
total_commits=len(commits),
total_insertions=total_ins,
total_deletions=total_del,
total_files_changed=total_files,
authors=authors,
change_categories=categories,
areas_touched=areas,
)
except (subprocess.SubprocessError, FileNotFoundError, OSError):
logger.debug("git analysis unavailable", exc_info=True)
return CodeChangeAnalysis()
def _parse_batched_output(stdout: str) -> list:
"""Parse the batched git log output into CommitInfo objects."""
commits = []
raw_commits = stdout.split("COMMITSTART")
for raw in raw_commits:
raw = raw.strip()
if not raw:
continue
header_end = raw.find("ENDHEADER")
if header_end < 0:
continue
header = raw[:header_end].strip()
lines = header.split("\n")
if len(lines) < 4:
continue
hash_short = lines[0].strip()
subject = lines[1].strip()
author = lines[2].strip()
try:
timestamp = float(lines[3].strip())
except ValueError:
continue
body = "\n".join(lines[4:]).strip()[:500]
# After ENDHEADER: shortstat line(s) + file list
rest = raw[header_end + len("ENDHEADER"):].strip()
files_changed = 0
insertions = 0
deletions = 0
file_list = []
stat_done = False
for rline in rest.split("\n"):
rline = rline.strip()
if not rline:
continue
if not stat_done and ("files changed" in rline or "file changed" in rline
or "insertion" in rline or "deletion" in rline):
files_changed = _parse_int(r'(\d+) files? changed', rline)
insertions = _parse_int(r'(\d+) insertion', rline)
deletions = _parse_int(r'(\d+) deletion', rline)
stat_done = True
continue
if "/" in rline or "." in rline:
file_list.append(rline)
commits.append(CommitInfo(
hash_short=hash_short,
subject=subject,
body=body,
author=author,
timestamp=timestamp,
files_changed=files_changed,
insertions=insertions,
deletions=deletions,
file_list=file_list[:20],
))
return commits
# ── Helpers ───────────────────────────────────────────────────────────────
def _parse_int(pattern: str, text: str) -> int:
"""Extract first integer matching regex pattern from text."""
m = re.search(pattern, text)
return int(m.group(1)) if m else 0
def _categorize_commit(subject: str) -> str:
"""Categorize commit by conventional commit prefix."""
s = subject.lower()
for prefix in ("feat", "fix", "refactor", "test", "docs", "chore", "perf", "style", "ci", "build"):
if s.startswith(prefix):
return prefix
return "other"

200
self_evolution/hooks.py Normal file
View file

@ -0,0 +1,200 @@
"""
Self Evolution Plugin Lifecycle Hooks
========================================
Registered hooks:
- post_tool_call: Collect per-tool telemetry
- on_session_end: Compute quality score + detect outcome signals
- pre_llm_call: Inject learned strategy hints
"""
from __future__ import annotations
import logging
import re
import time
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
# ── Correction detection patterns (inspired by Claude Code conversation-analyzer) ──
CORRECTION_PATTERNS = re.compile(
r"(不对|错误|重试|不要|停|stop|wrong|retry|no|don't|not that|不是|不是这个|为什么|换一种)",
re.IGNORECASE,
)
FRUSTRATION_PATTERNS = re.compile(
r"(烦|慢|太慢|浪费时间|浪费时间|浪费时间|why did you|无语|算了|够了)",
re.IGNORECASE,
)
# ── post_tool_call ───────────────────────────────────────────────────────
def on_tool_call(**kwargs) -> None:
"""Collect per-tool invocation telemetry."""
from self_evolution.db import insert
tool_name = kwargs.get("tool_name", "unknown")
started_at = kwargs.get("started_at", time.time())
duration_ms = kwargs.get("duration_ms", 0)
success = kwargs.get("success", True)
error_type = kwargs.get("error_type") if not success else None
session_id = kwargs.get("session_id", "")
turn_number = kwargs.get("turn_number", 0)
try:
insert("tool_invocations", {
"session_id": session_id,
"tool_name": tool_name,
"duration_ms": duration_ms,
"success": success,
"error_type": error_type,
"turn_number": turn_number,
"created_at": started_at,
})
except Exception as exc:
logger.warning("telemetry insert failed: %s", exc)
# ── on_session_end ───────────────────────────────────────────────────────
def on_session_end(**kwargs) -> None:
"""Compute quality score and detect outcome signals when session ends."""
from self_evolution.db import insert, insert_many
from self_evolution.quality_scorer import compute_score
session_data = kwargs.get("session_data", {})
session_id = session_data.get("session_id", "")
if not session_id:
return
# Compute quality score
score = compute_score(session_data)
try:
insert("session_scores", score.to_db_row())
except Exception as exc:
logger.warning("score insert failed: %s", exc)
# Detect and batch-insert outcome signals
signals = _detect_outcome_signals(session_data, kwargs)
if signals:
try:
insert_many("outcome_signals", signals)
except Exception as exc:
logger.warning("signal insert failed: %s", exc)
def _detect_outcome_signals(session_data: dict, kwargs: dict) -> list:
"""Detect implicit outcome signals from session behavior.
Inspired by Claude Code conversation-analyzer's signal detection:
- Explicit corrections: user says "不对", "重试"
- Frustration signals: user says "为什么", "太慢"
- Completion / interruption status
- Budget exhaustion
"""
signals = []
session_id = session_data.get("session_id", "")
# Completion signal
completed = session_data.get("completed", False)
interrupted = session_data.get("interrupted", False)
partial = session_data.get("partial", False)
if completed:
signals.append({
"session_id": session_id,
"signal_type": "completed",
"signal_value": 1.0,
"metadata": "{}",
})
elif interrupted:
signals.append({
"session_id": session_id,
"signal_type": "interrupted",
"signal_value": 0.5,
"metadata": "{}",
})
elif partial:
signals.append({
"session_id": session_id,
"signal_type": "partial",
"signal_value": 0.3,
"metadata": "{}",
})
# Budget exhaustion
max_iterations = session_data.get("max_iterations", 0)
iterations = session_data.get("iterations", 0)
if max_iterations and iterations >= max_iterations:
signals.append({
"session_id": session_id,
"signal_type": "budget_exhausted",
"signal_value": 0.0,
"metadata": f'{{"iterations": {iterations}}}',
})
# User correction / frustration detection from messages
messages = session_data.get("messages", [])
for msg in messages:
if msg.get("role") != "user":
continue
content = msg.get("content", "")
if isinstance(content, list):
content = " ".join(
block.get("text", "") for block in content
if isinstance(block, dict) and block.get("type") == "text"
)
if CORRECTION_PATTERNS.search(content):
signals.append({
"session_id": session_id,
"signal_type": "correction",
"signal_value": 0.2,
"metadata": f'{{"text": {repr(content[:100])}}}',
})
break # Only one correction signal per session
if FRUSTRATION_PATTERNS.search(content):
signals.append({
"session_id": session_id,
"signal_type": "frustration",
"signal_value": 0.1,
"metadata": f'{{"text": {repr(content[:100])}}}',
})
break
return signals
# ── pre_llm_call ─────────────────────────────────────────────────────────
def on_pre_llm_call(**kwargs) -> Optional[Dict[str, Any]]:
"""Inject learned strategy hints into system prompt.
Inspired by Claude Code learning-output-style SessionStart hook pattern:
automatically inject behavioral context without user action.
"""
from self_evolution.strategy_injector import inject_hints
try:
hints = inject_hints(kwargs)
if hints:
return {"system_hint": hints}
except Exception as exc:
logger.warning("strategy injection failed: %s", exc)
return None
# ── Registration ─────────────────────────────────────────────────────────
def register_all(ctx) -> None:
"""Register all lifecycle hooks via PluginContext."""
ctx.register_hook("post_tool_call", on_tool_call)
ctx.register_hook("on_session_end", on_session_end)
ctx.register_hook("pre_llm_call", on_pre_llm_call)

View file

@ -0,0 +1,248 @@
"""
Self Evolution Plugin Model Configuration & Failover
======================================================
Handles runtime model resolution (primary / fallback / multimodal)
and thread-safe failover state management.
Extracted from reflection_engine.py for single-responsibility.
"""
from __future__ import annotations
import logging
import threading
import time
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
# ── Model Configuration Resolution ────────────────────────────────────────
def resolve_config() -> dict:
"""Resolve model config via hermes unified runtime provider.
Returns dict with:
base_url, api_key, model, provider primary text model
fallback: {base_url, api_key, model, provider} fallback text model
multimodal: {base_url, api_key, model, provider} vision model
Returns empty dict if no provider is available.
"""
try:
from hermes_cli.runtime_provider import resolve_runtime_provider
from hermes_cli.config import load_config
runtime = resolve_runtime_provider()
config = load_config()
model_name = config.get("model", {}).get("default", "")
result = {
"base_url": runtime.get("base_url", ""),
"api_key": runtime.get("api_key", ""),
"model": runtime.get("model", model_name),
"provider": runtime.get("provider", ""),
}
result["fallback"] = _resolve_fallback_config(config)
result["multimodal"] = _resolve_multimodal_config(config)
return result
except Exception:
logger.warning("Failed to resolve runtime provider", exc_info=True)
return {}
def _resolve_fallback_config(config: dict = None) -> dict:
"""Resolve fallback text model from config.yaml fallback_providers."""
try:
from hermes_cli.runtime_provider import resolve_runtime_provider
if config is None:
from hermes_cli.config import load_config
config = load_config()
for fb in config.get("fallback_providers", []):
fb_provider = (fb.get("provider") or "").strip()
fb_model = (fb.get("model") or "").strip()
if not fb_provider:
continue
try:
rt = resolve_runtime_provider(requested=fb_provider)
base_url = rt.get("base_url", "")
api_key = rt.get("api_key", "")
if base_url and fb_model:
return {
"base_url": base_url,
"api_key": api_key,
"model": fb_model,
"provider": rt.get("provider", ""),
}
except Exception:
pass
for cp in config.get("custom_providers", []):
base_url = (cp.get("base_url") or cp.get("api", "")).strip()
if base_url and ("localhost" in base_url or "127.0.0.1" in base_url):
model = (cp.get("model") or "").strip()
if not model:
model = _detect_local_model(
base_url,
(cp.get("api_key") or "").strip(),
)
if model and "gemma-4-26b" not in model.lower():
return {
"base_url": base_url.rstrip("/"),
"api_key": (cp.get("api_key") or "").strip(),
"model": model,
"provider": "custom",
}
return {}
except Exception:
logger.warning("Failed to resolve fallback config", exc_info=True)
return {}
def _resolve_multimodal_config(config: dict = None) -> dict:
"""Resolve multimodal (vision) model config."""
try:
from hermes_cli.runtime_provider import resolve_runtime_provider
if config is None:
from hermes_cli.config import load_config
config = load_config()
aux = config.get("auxiliary", {})
vision_cfg = aux.get("vision", {})
vision_provider = (vision_cfg.get("provider") or "").strip().lower()
if vision_provider and vision_provider != "auto":
try:
rt = resolve_runtime_provider(requested=vision_provider)
if rt.get("base_url"):
return {
"base_url": rt.get("base_url", ""),
"api_key": rt.get("api_key", ""),
"model": vision_cfg.get("model") or rt.get("model", ""),
"provider": rt.get("provider", ""),
}
except Exception:
pass
for cp in config.get("custom_providers", []):
base_url = (cp.get("base_url") or cp.get("api", "")).strip()
if base_url and ("localhost" in base_url or "127.0.0.1" in base_url):
api_key = (cp.get("api_key") or "").strip()
key_env = (cp.get("key_env") or "").strip()
if not api_key and key_env:
import os
api_key = os.getenv(key_env, "")
model = (cp.get("model") or "").strip()
if not model:
model = _detect_local_model(base_url, api_key)
if model:
return {
"base_url": base_url.rstrip("/"),
"api_key": api_key,
"model": model,
"provider": "custom",
}
return {}
except Exception:
logger.warning("Failed to resolve multimodal config", exc_info=True)
return {}
# ── Failover State (thread-safe) ──────────────────────────────────────────
_active_model: str = "primary"
_last_health_check: float = 0.0
_HEALTH_CHECK_INTERVAL: int = 1800 # 30 minutes
_failover_lock = threading.Lock()
def _check_primary_health(config: dict) -> bool:
"""Quick health check: send a minimal request to the primary model."""
try:
import requests
base_url = config.get("base_url", "")
api_key = config.get("api_key", "")
model = config.get("model", "")
if not base_url or not model:
return False
resp = requests.post(
f"{base_url.rstrip('/')}/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
},
json={
"model": model,
"messages": [{"role": "user", "content": "OK"}],
"max_tokens": 2,
},
timeout=15,
)
return resp.status_code == 200
except Exception:
return False
def get_active_text_config(config: dict) -> tuple:
"""Return (active_config_dict, is_fallback) based on failover state."""
global _active_model, _last_health_check
with _failover_lock:
now = time.time()
if _active_model == "fallback":
if now - _last_health_check >= _HEALTH_CHECK_INTERVAL:
_last_health_check = now
if _check_primary_health(config):
_active_model = "primary"
logger.info("Primary model recovered, switching back")
else:
logger.info("Primary model still unavailable, staying on fallback")
fallback = config.get("fallback", {})
if _active_model == "primary":
return config, False
elif fallback:
return fallback, True
else:
return config, False
def switch_to_fallback():
"""Mark primary as down and switch to fallback."""
global _active_model, _last_health_check
with _failover_lock:
_active_model = "fallback"
_last_health_check = time.time()
logger.warning("Primary model failed, switched to fallback")
def _detect_local_model(base_url: str, api_key: str = "") -> str:
"""Auto-detect a multimodal model from a local server."""
try:
import requests
headers = {}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
resp = requests.get(
f"{base_url.rstrip('/')}/models",
headers=headers, timeout=5,
)
if resp.ok:
models = resp.json().get("data", [])
multimodal_hints = ["gemma-4", "qwen2-vl", "qwen-vl", "llava", "pixtral", "vision"]
for m in models:
mid = m.get("id", "").lower()
for hint in multimodal_hints:
if hint in mid:
return m["id"]
except Exception:
pass
return ""

363
self_evolution/models.py Normal file
View file

@ -0,0 +1,363 @@
"""
Self Evolution Plugin Data Models
=====================================
"""
from __future__ import annotations
from dataclasses import dataclass, field, asdict
from typing import Any, Dict, List, Optional
import json
import time
def _now() -> float:
return time.time()
def _ts() -> str:
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# ── Quality Scoring ──────────────────────────────────────────────────────
@dataclass
class QualityScore:
session_id: str
composite: float = 0.0
completion_rate: float = 0.0
efficiency_score: float = 0.0
cost_efficiency: float = 0.0
satisfaction_proxy: float = 0.0
task_category: str = ""
model: str = ""
created_at: float = field(default_factory=_now)
def to_db_row(self) -> dict:
return {
"session_id": self.session_id,
"composite_score": self.composite,
"completion_rate": self.completion_rate,
"efficiency_score": self.efficiency_score,
"cost_efficiency": self.cost_efficiency,
"satisfaction_proxy": self.satisfaction_proxy,
"task_category": self.task_category,
"model": self.model,
"created_at": self.created_at,
}
# ── Error Analysis ───────────────────────────────────────────────────────
@dataclass
class ToolFailure:
tool_name: str
error_type: str
count: int
sessions_affected: List[str] = field(default_factory=list)
example_session: str = ""
@dataclass
class RetryPattern:
session_id: str
tool_name: str
attempt_count: int
final_outcome: str # "success" | "failure" | "abandoned"
@dataclass
class ErrorAnalysis:
tool_failures: List[ToolFailure] = field(default_factory=list)
retry_patterns: List[RetryPattern] = field(default_factory=list)
incomplete_sessions: List[str] = field(default_factory=list)
user_corrections: int = 0
correction_examples: List[str] = field(default_factory=list)
api_error_count: int = 0
api_error_types: Dict[str, int] = field(default_factory=dict)
def summary(self) -> str:
lines = []
if self.tool_failures:
lines.append(f"工具失败: {len(self.tool_failures)} 种工具出错")
for tf in self.tool_failures[:5]:
lines.append(f" - {tf.tool_name}: {tf.count}次 ({tf.error_type})")
if self.retry_patterns:
retries = len(self.retry_patterns)
lines.append(f"重复重试: {retries}")
if self.incomplete_sessions:
lines.append(f"未完成session: {len(self.incomplete_sessions)}")
if self.user_corrections:
lines.append(f"用户纠正: {self.user_corrections}")
if self.api_error_count:
lines.append(f"API错误: {self.api_error_count}")
return "\n".join(lines)
# ── Time Waste Analysis ──────────────────────────────────────────────────
@dataclass
class ToolDuration:
tool_name: str
total_duration_ms: int
call_count: int
avg_duration_ms: float
@dataclass
class RepeatedOperation:
description: str
count: int
sessions: List[str] = field(default_factory=list)
wasted_ms: int = 0
@dataclass
class WasteAnalysis:
slowest_tools: List[ToolDuration] = field(default_factory=list)
repeated_operations: List[RepeatedOperation] = field(default_factory=list)
inefficient_sessions: List[str] = field(default_factory=list)
shortcut_opportunities: List[str] = field(default_factory=list)
def summary(self) -> str:
lines = []
if self.slowest_tools:
lines.append("耗时最长的工具:")
for td in self.slowest_tools[:5]:
lines.append(f" - {td.tool_name}: 平均{td.avg_duration_ms:.0f}ms ({td.call_count}次)")
if self.repeated_operations:
lines.append(f"重复操作: {len(self.repeated_operations)}")
for ro in self.repeated_operations[:5]:
lines.append(f" - {ro.description}: {ro.count}")
if self.inefficient_sessions:
lines.append(f"低效session: {len(self.inefficient_sessions)}")
if self.shortcut_opportunities:
lines.append(f"可优化路径: {len(self.shortcut_opportunities)}")
return "\n".join(lines)
# ── Code Change Analysis ──────────────────────────────────────────────────
@dataclass
class CommitInfo:
hash_short: str
subject: str
body: str = ""
author: str = ""
timestamp: float = 0.0
files_changed: int = 0
insertions: int = 0
deletions: int = 0
file_list: List[str] = field(default_factory=list)
@dataclass
class CodeChangeAnalysis:
commits: List[CommitInfo] = field(default_factory=list)
total_commits: int = 0
total_insertions: int = 0
total_deletions: int = 0
total_files_changed: int = 0
authors: List[str] = field(default_factory=list)
change_categories: Dict[str, int] = field(default_factory=dict)
areas_touched: List[str] = field(default_factory=list)
def summary(self) -> str:
if not self.commits:
return "代码更新: 无新提交"
lines = [
f"代码更新: {self.total_commits} commits, "
f"+{self.total_insertions}/-{self.total_deletions} lines, "
f"{self.total_files_changed} files changed",
]
if self.change_categories:
cats = ", ".join(f"{k}: {v}" for k, v in self.change_categories.items())
lines.append(f"提交类型分布: {cats}")
if self.areas_touched:
lines.append(f"涉及模块: {', '.join(self.areas_touched)}")
lines.append("主要变更:")
for c in self.commits[:8]:
lines.append(f" - {c.subject} ({c.hash_short}, +{c.insertions}/-{c.deletions})")
return "\n".join(lines)
# ── Reflection Report ────────────────────────────────────────────────────
@dataclass
class ReflectionReport:
period_start: float
period_end: float
sessions_analyzed: int = 0
avg_score: float = 0.0
error_summary: str = ""
waste_summary: str = ""
worst_patterns: List[str] = field(default_factory=list)
best_patterns: List[str] = field(default_factory=list)
tool_insights: Dict[str, Dict[str, Any]] = field(default_factory=dict)
recommendations: List[str] = field(default_factory=list)
code_change_summary: str = ""
model_used: str = ""
created_at: float = field(default_factory=_now)
def to_db_row(self) -> dict:
return {
"period_start": self.period_start,
"period_end": self.period_end,
"sessions_analyzed": self.sessions_analyzed,
"avg_score": self.avg_score,
"error_summary": self.error_summary,
"waste_summary": self.waste_summary,
"worst_patterns": json.dumps(self.worst_patterns, ensure_ascii=False),
"best_patterns": json.dumps(self.best_patterns, ensure_ascii=False),
"tool_insights": json.dumps(self.tool_insights, ensure_ascii=False),
"recommendations": json.dumps(self.recommendations, ensure_ascii=False),
"code_change_summary": self.code_change_summary,
"model_used": self.model_used,
"created_at": self.created_at,
}
# ── Evolution Proposal ───────────────────────────────────────────────────
@dataclass
class Proposal:
id: str
proposal_type: str # skill | strategy | memory | tool_preference | code_improvement
title: str
description: str
expected_impact: str = ""
risk_assessment: str = "low"
rollback_plan: str = ""
status: str = "pending_approval"
report_id: Optional[int] = None
user_feedback: str = ""
created_at: float = field(default_factory=_now)
resolved_at: Optional[float] = None
def to_db_row(self) -> dict:
return {
"id": self.id,
"report_id": self.report_id,
"proposal_type": self.proposal_type,
"title": self.title,
"description": self.description,
"expected_impact": self.expected_impact,
"risk_assessment": self.risk_assessment,
"rollback_plan": self.rollback_plan,
"status": self.status,
"user_feedback": self.user_feedback,
"created_at": self.created_at,
"resolved_at": self.resolved_at,
}
# ── Improvement Unit (A/B Test Tracking) ─────────────────────────────────
@dataclass
class ImprovementUnit:
id: str
proposal_id: str
change_type: str
version: int = 0
baseline_score: float = 0.0
current_score: float = 0.0
sessions_sampled: int = 0
min_sessions: int = 10
min_improvement: float = 0.05
max_regression: float = 0.10
status: str = "active" # active | promoted | reverted
created_at: float = field(default_factory=_now)
resolved_at: Optional[float] = None
@property
def should_revert(self) -> bool:
return (
self.sessions_sampled >= 3
and (self.baseline_score - self.current_score) > self.max_regression
)
@property
def should_promote(self) -> bool:
return (
self.sessions_sampled >= self.min_sessions
and (self.current_score - self.baseline_score) >= self.min_improvement
)
def to_db_row(self) -> dict:
return {
"id": self.id,
"proposal_id": self.proposal_id,
"change_type": self.change_type,
"version": self.version,
"baseline_score": self.baseline_score,
"current_score": self.current_score,
"sessions_sampled": self.sessions_sampled,
"min_sessions": self.min_sessions,
"min_improvement": self.min_improvement,
"max_regression": self.max_regression,
"status": self.status,
"created_at": self.created_at,
"resolved_at": self.resolved_at,
}
# ── Strategy Rule ────────────────────────────────────────────────────────
@dataclass
class StrategyCondition:
field: str
operator: str # regex_match | contains | equals | not_contains
pattern: str
@dataclass
class StrategyRule:
id: str
name: str
strategy_type: str # hint | avoid | prefer
description: str
conditions: List[StrategyCondition] = field(default_factory=list)
hint_text: str = ""
severity: str = "medium" # high | medium | low
enabled: bool = True
version: int = 1
source: str = "learned" # learned | manual | default
created_at: float = field(default_factory=_now)
def to_dict(self) -> dict:
return {
"id": self.id,
"name": self.name,
"strategy_type": self.strategy_type,
"description": self.description,
"conditions": [
{"field": c.field, "operator": c.operator, "pattern": c.pattern}
for c in self.conditions
],
"hint_text": self.hint_text,
"severity": self.severity,
"enabled": self.enabled,
"version": self.version,
"source": self.source,
"created_at": self.created_at,
}
@classmethod
def from_dict(cls, d: dict) -> StrategyRule:
conditions = [
StrategyCondition(field=c["field"], operator=c["operator"], pattern=c["pattern"])
for c in d.get("conditions", [])
]
return cls(
id=d["id"],
name=d["name"],
strategy_type=d.get("strategy_type", "hint"),
description=d.get("description", ""),
conditions=conditions,
hint_text=d.get("hint_text", ""),
severity=d.get("severity", "medium"),
enabled=d.get("enabled", True),
version=d.get("version", 1),
source=d.get("source", "learned"),
created_at=d.get("created_at", _now()),
)

17
self_evolution/paths.py Normal file
View file

@ -0,0 +1,17 @@
"""
Self Evolution Plugin Centralized Path Definitions
=====================================================
Single source of truth for all filesystem paths used by the plugin.
"""
from pathlib import Path
HERMES_HOME = Path.home() / ".hermes"
DATA_DIR = HERMES_HOME / "self_evolution"
DB_PATH = DATA_DIR / "evolution.db"
STRATEGIES_FILE = DATA_DIR / "strategies.json"
ARCHIVE_DIR = DATA_DIR / "archive"
SKILLS_DIR = HERMES_HOME / "skills" / "learned"
MEMORIES_DIR = HERMES_HOME / "memories"
CRON_DIR = HERMES_HOME / "cron"

View file

@ -0,0 +1,7 @@
name: self_evolution
version: "1.0.0"
description: "Agent self-optimization and continuous evolution — dream consolidation at 1:00, Feishu approval at 19:00"
provides_hooks:
- post_tool_call
- on_session_end
- pre_llm_call

View file

@ -0,0 +1,7 @@
## 概况
- 时段: {period_range}
- Session 数: {sessions_count}, 平均质量: {avg_score}
- 工具调用: {total_invocations} 次, 成功率 {success_rate}%
## 数据
{data_json}

View file

@ -0,0 +1,177 @@
"""
Self Evolution Plugin Quality Scorer
=======================================
Computes a composite quality score for each session:
session_quality = 0.4 * completion_rate
+ 0.2 * efficiency_score
+ 0.15 * cost_efficiency
+ 0.25 * satisfaction_proxy
Zero API cost pure computation from already-collected session data.
"""
from __future__ import annotations
import logging
from typing import Any, Dict
from self_evolution.models import QualityScore
logger = logging.getLogger(__name__)
# ── Weights ──────────────────────────────────────────────────────────────
W_COMPLETION = 0.40
W_EFFICIENCY = 0.20
W_COST = 0.15
W_SATISFACTION = 0.25
# Ideal iteration counts by task complexity
IDEAL_ITERATIONS = {
"simple": 3,
"medium": 8,
"complex": 15,
}
DEFAULT_IDEAL_ITERATIONS = 8
def compute_score(session_data: dict) -> QualityScore:
"""Compute a composite quality score from session data.
Args:
session_data: dict with keys like:
- completed, interrupted, partial
- iterations, max_iterations
- tool_call_count, message_count
- input_tokens, output_tokens, estimated_cost_usd
- duration_seconds
- model, platform
- messages (list)
Returns:
QualityScore with individual and composite scores.
"""
session_id = session_data.get("session_id", "")
completion = _completion_rate(session_data)
efficiency = _efficiency_score(session_data)
cost = _cost_efficiency(session_data)
satisfaction = _satisfaction_proxy(session_data)
composite = (
W_COMPLETION * completion
+ W_EFFICIENCY * efficiency
+ W_COST * cost
+ W_SATISFACTION * satisfaction
)
return QualityScore(
session_id=session_id,
composite=round(composite, 3),
completion_rate=round(completion, 3),
efficiency_score=round(efficiency, 3),
cost_efficiency=round(cost, 3),
satisfaction_proxy=round(satisfaction, 3),
task_category=_detect_task_category(session_data),
model=session_data.get("model", ""),
)
# ── Individual Score Components ──────────────────────────────────────────
def _completion_rate(session_data: dict) -> float:
"""1.0 if completed, 0.5 if interrupted, 0.0 if failed."""
if session_data.get("completed"):
return 1.0
if session_data.get("interrupted"):
return 0.5
if session_data.get("partial"):
return 0.3
return 0.0
def _efficiency_score(session_data: dict) -> float:
"""Ideal iterations / actual iterations, capped at 1.0."""
iterations = session_data.get("iterations", 0) or session_data.get("tool_call_count", 0)
if iterations <= 0:
return 1.0
category = _detect_task_category(session_data)
ideal = IDEAL_ITERATIONS.get(category, DEFAULT_IDEAL_ITERATIONS)
return min(1.0, ideal / max(iterations, 1))
def _cost_efficiency(session_data: dict) -> float:
"""Baseline cost / actual cost, capped at 1.0.
Uses message count as a proxy for expected work.
"""
messages = session_data.get("message_count", 1) or 1
tool_calls = session_data.get("tool_call_count", 0) or 0
iterations = session_data.get("iterations", 0) or 0
# Expected: roughly 2 tool calls per user message
expected_tool_calls = messages * 2
if expected_tool_calls <= 0:
return 1.0
return min(1.0, expected_tool_calls / max(tool_calls, 1))
def _satisfaction_proxy(session_data: dict) -> float:
"""Estimate satisfaction from behavioral signals.
Signals:
- Single-turn session (user got what they needed) = high
- Multi-turn but completed = medium-high
- User corrections detected = lower
- Budget exhausted = low
"""
messages = session_data.get("message_count", 1) or 1
completed = session_data.get("completed", False)
max_iterations = session_data.get("max_iterations", 0)
iterations = session_data.get("iterations", 0)
score = 0.7 # baseline
# Single-turn completion is a strong positive signal
if messages <= 2 and completed:
score = 0.9
elif completed:
score = 0.75
elif messages > 10:
score = 0.5
# Budget exhaustion is a negative signal
if max_iterations and iterations >= max_iterations:
score -= 0.2
return max(0.0, min(1.0, score))
# ── Task Category Detection ──────────────────────────────────────────────
def _detect_task_category(session_data: dict) -> str:
"""Detect task category from tool usage patterns."""
tool_names = session_data.get("tool_names", [])
if isinstance(tool_names, str):
tool_names = tool_names.split(",")
tool_set = set(t.lower() for t in tool_names) if tool_names else set()
coding_tools = {"terminal", "bash", "write", "edit", "file_write", "file_edit"}
web_tools = {"web_search", "browser", "browser_navigate", "scrape", "fetch"}
file_tools = {"read", "file_read", "grep", "glob", "find"}
if tool_set & coding_tools:
return "coding"
if tool_set & web_tools:
return "web_research"
if tool_set & file_tools:
return "file_analysis"
return "general"

View file

@ -0,0 +1,751 @@
"""
Self Evolution Plugin Dream Engine (Reflection Engine)
=========================================================
Runs nightly at 1:00 to analyze the previous day's sessions.
Design reference: Claude Code plugins/hookify/agents/conversation-analyzer.md
- Analyzes conversations in reverse chronological order
- Detects: corrections, frustrations, repeated issues, reversions
- Extracts tool usage patterns, converts to actionable rules
- Categorizes by severity
We extend this pattern with:
- Full automated analysis (not just on user request)
- Error analysis (tool failures, retries, API errors)
- Time waste analysis (slow tools, repeated ops, inefficient sessions)
"""
from __future__ import annotations
import json
import logging
import os
import re
import time
from pathlib import Path
from typing import Any, Dict, List, Optional
from self_evolution import db
from self_evolution.model_config import resolve_config, get_active_text_config, switch_to_fallback
from self_evolution.git_analyzer import analyze_code_changes
from self_evolution.models import (
ErrorAnalysis, ToolFailure, RetryPattern,
WasteAnalysis, ToolDuration, RepeatedOperation,
CodeChangeAnalysis, CommitInfo,
ReflectionReport,
)
logger = logging.getLogger(__name__)
# ── Backward-compatible aliases ────────────────────────────────────────────
# These are used by cron_jobs.py and other callers.
_resolve_runtime_config = resolve_config
_get_active_text_config = get_active_text_config
_switch_to_fallback = switch_to_fallback
class DreamEngine:
"""Nightly dream consolidation engine.
Analyzes the previous day's sessions to find:
1. Error patterns (tool failures, retries, incomplete tasks)
2. Time waste patterns (slow tools, repeated operations, inefficient flows)
3. Success patterns (what worked well)
4. Generates actionable evolution proposals
"""
def __init__(self, config: dict = None):
self.config = config or _resolve_runtime_config()
self._model_client = None
self._current_prompt = ""
def run(self, hours: int = 24, max_runtime_seconds: int = 0) -> Optional[ReflectionReport]:
"""Main dream consolidation flow.
Args:
hours: Analyze data from the last N hours.
max_runtime_seconds: Hard timeout in seconds. 0 = no limit.
If exceeded, stops at the next step boundary and returns None.
"""
logger.info("Dream engine starting — analyzing last %d hours", hours)
deadline = time.time() + max_runtime_seconds if max_runtime_seconds > 0 else 0
now = time.time()
cutoff = now - (hours * 3600)
try:
# 1. Load session data
scores = db.fetch_all(
"session_scores",
where="created_at >= ?",
params=(cutoff,),
order_by="created_at DESC",
)
tool_invocations = db.fetch_all(
"tool_invocations",
where="created_at >= ?",
params=(cutoff,),
order_by="created_at DESC",
)
signals = db.fetch_all(
"outcome_signals",
where="created_at >= ?",
params=(cutoff,),
)
if not scores:
logger.info("No sessions to analyze")
return None
# 2. Error analysis
if deadline and time.time() > deadline:
logger.warning("Dream engine timed out before error analysis")
return None
error_analysis = self._analyze_errors(scores, tool_invocations, signals)
logger.info("Error analysis: %s", error_analysis.summary())
# 3. Time waste analysis
if deadline and time.time() > deadline:
logger.warning("Dream engine timed out before waste analysis")
return None
waste_analysis = self._analyze_time_waste(scores, tool_invocations)
logger.info("Waste analysis: %s", waste_analysis.summary())
# 3.5. Code change analysis
if deadline and time.time() > deadline:
logger.warning("Dream engine timed out before code analysis")
return None
code_analysis = analyze_code_changes(hours=hours)
logger.info("Code change analysis: %d commits found", code_analysis.total_commits)
# 4. Compute average score
avg_score = (
sum(s.get("composite_score", 0) for s in scores) / len(scores)
if scores else 0
)
# 5. Build reflection prompt
if deadline and time.time() > deadline:
logger.warning("Dream engine timed out before model call")
return None
prompt = self._build_reflection_prompt(
scores, tool_invocations, signals,
error_analysis, waste_analysis, avg_score,
code_analysis=code_analysis,
)
# 6. Call model for deep reflection
reflection_text = self._call_model(prompt)
if not reflection_text:
logger.warning("Model returned empty reflection")
return None
# 7. Parse reflection report
report = self._parse_reflection(
reflection_text=reflection_text,
period_start=cutoff,
period_end=now,
sessions_analyzed=len(scores),
avg_score=avg_score,
error_analysis=error_analysis,
waste_analysis=waste_analysis,
code_analysis=code_analysis,
)
# 8. Store report
report_id = db.insert("reflection_reports", report.to_db_row())
logger.info("Reflection report saved: id=%d, avg_score=%.3f", report_id, avg_score)
# 9. Generate evolution proposals
from self_evolution.evolution_proposer import generate_proposals
proposals = generate_proposals(report, report_id)
for p in proposals:
db.insert("evolution_proposals", p.to_db_row())
logger.info("Generated %d evolution proposals", len(proposals))
# 10. Compress existing strategies
try:
from self_evolution.strategy_compressor import compress_strategies
from self_evolution.strategy_store import StrategyStore
store = StrategyStore()
data = store.load()
rules = data.get("rules", [])
compressed = compress_strategies(rules)
if len(compressed) < len(rules):
data["rules"] = compressed
store.save(data)
logger.info("Strategies compressed: %d%d", len(rules), len(compressed))
except Exception as exc:
logger.warning("Strategy compression failed: %s", exc)
# 11. Cleanup old data
db.cleanup(days=30)
return report
except Exception as exc:
logger.exception("Dream engine failed: %s", exc)
return None
# ── Error Analysis ────────────────────────────────────────────────────
def _analyze_errors(
self,
scores: List[dict],
invocations: List[dict],
signals: List[dict],
) -> ErrorAnalysis:
"""Analyze all errors in the period.
Inspired by Claude Code conversation-analyzer's signal detection.
"""
# Tool failures
failures = {}
for inv in invocations:
if not inv.get("success", True):
tool = inv.get("tool_name", "unknown")
error_type = inv.get("error_type", "unknown")
key = f"{tool}:{error_type}"
if key not in failures:
failures[key] = ToolFailure(
tool_name=tool,
error_type=error_type,
count=0,
sessions_affected=[],
example_session=inv.get("session_id", ""),
)
failures[key].count += 1
sid = inv.get("session_id", "")
if sid and sid not in failures[key].sessions_affected:
failures[key].sessions_affected.append(sid)
# Retry patterns (same tool called > 2 times in same session)
retries = self._detect_retry_patterns(invocations)
# Incomplete sessions
incomplete = [
s.get("session_id", "") for s in scores
if s.get("completion_rate", 1.0) < 0.5
]
# User corrections from signals
corrections = [s for s in signals if s.get("signal_type") == "correction"]
frustration = [s for s in signals if s.get("signal_type") == "frustration"]
api_errors = [s for s in signals if s.get("signal_type") == "api_error"]
# API error type distribution
api_error_types: Dict[str, int] = {}
for s in api_errors:
meta = json.loads(s.get("metadata", "{}"))
etype = meta.get("error_type", "unknown")
api_error_types[etype] = api_error_types.get(etype, 0) + 1
return ErrorAnalysis(
tool_failures=sorted(failures.values(), key=lambda x: x.count, reverse=True),
retry_patterns=retries,
incomplete_sessions=incomplete,
user_corrections=len(corrections),
correction_examples=[s.get("metadata", "") for s in corrections[:3]],
api_error_count=len(api_errors),
api_error_types=api_error_types,
)
def _detect_retry_patterns(self, invocations: List[dict]) -> List[RetryPattern]:
"""Detect tools called > 2 times in same session."""
session_tools: Dict[str, Dict[str, int]] = {}
for inv in invocations:
sid = inv.get("session_id", "")
tool = inv.get("tool_name", "")
if sid not in session_tools:
session_tools[sid] = {}
session_tools[sid][tool] = session_tools[sid].get(tool, 0) + 1
patterns = []
for sid, tools in session_tools.items():
for tool, count in tools.items():
if count > 2:
patterns.append(RetryPattern(
session_id=sid,
tool_name=tool,
attempt_count=count,
final_outcome="unknown",
))
return sorted(patterns, key=lambda x: x.attempt_count, reverse=True)[:20]
# ── Time Waste Analysis ───────────────────────────────────────────────
def _analyze_time_waste(
self,
scores: List[dict],
invocations: List[dict],
) -> WasteAnalysis:
"""Analyze time waste patterns."""
# Slowest tools
tool_durations: Dict[str, List[int]] = {}
for inv in invocations:
tool = inv.get("tool_name", "")
duration = inv.get("duration_ms", 0)
if not duration:
continue
if tool not in tool_durations:
tool_durations[tool] = []
tool_durations[tool].append(duration)
slowest = [
ToolDuration(
tool_name=tool,
total_duration_ms=sum(durs),
call_count=len(durs),
avg_duration_ms=sum(durs) / len(durs),
)
for tool, durs in tool_durations.items()
]
slowest.sort(key=lambda x: x.avg_duration_ms, reverse=True)
# Repeated operations (same tool + same session > 3 times)
session_tool_calls: Dict[str, Dict[str, int]] = {}
for inv in invocations:
sid = inv.get("session_id", "")
tool = inv.get("tool_name", "")
if sid not in session_tool_calls:
session_tool_calls[sid] = {}
session_tool_calls[sid][tool] = session_tool_calls[sid].get(tool, 0) + 1
repeated = []
for sid, tools in session_tool_calls.items():
for tool, count in tools.items():
if count > 3:
repeated.append(RepeatedOperation(
description=f"{tool} called {count} times",
count=count,
sessions=[sid],
wasted_ms=tool_durations.get(tool, [0])[0] * (count - 2) if tool in tool_durations else 0,
))
# Inefficient sessions (low efficiency score)
inefficient = [
s.get("session_id", "") for s in scores
if s.get("efficiency_score", 1.0) < 0.3
]
return WasteAnalysis(
slowest_tools=slowest[:10],
repeated_operations=sorted(repeated, key=lambda x: x.count, reverse=True)[:10],
inefficient_sessions=inefficient,
shortcut_opportunities=[],
)
# ── Reflection Prompt ─────────────────────────────────────────────────
def _build_reflection_prompt(
self,
scores: List[dict],
invocations: List[dict],
signals: List[dict],
errors: ErrorAnalysis,
waste: WasteAnalysis,
avg_score: float,
code_analysis: CodeChangeAnalysis = None,
) -> str:
"""Build the reflection prompt as structured JSON data.
All analysis results are serialized as JSON so the model receives
lossless data instead of pre-summarized text.
"""
# Load user prompt template (short: just overview + data placeholder)
template_path = Path(__file__).parent / "prompts" / "reflection.md"
if template_path.exists():
template = template_path.read_text(encoding="utf-8")
else:
template = _DEFAULT_REFLECTION_PROMPT
# Compute statistics
total_invocations = len(invocations)
success_rate = (
sum(1 for i in invocations if i.get("success", True)) / total_invocations * 100
if total_invocations else 100
)
# Period range
if scores:
ts_min = min(s.get("created_at", 0) for s in scores)
ts_max = max(s.get("created_at", 0) for s in scores)
period_range = (
f"{time.strftime('%m-%d %H:%M', time.localtime(ts_min))} ~ "
f"{time.strftime('%m-%d %H:%M', time.localtime(ts_max))}"
)
else:
period_range = "N/A"
# Build structured data JSON — compact format to save tokens
data = {}
# 1. Sessions — compact: [score, completion, efficiency, cost, satisfaction, category]
data["sessions"] = [
[
round(s.get("composite_score", 0), 2),
round(s.get("completion_rate", 0), 2),
round(s.get("efficiency_score", 0), 2),
round(s.get("cost_efficiency", 0), 2),
round(s.get("satisfaction_proxy", 0), 2),
s.get("task_category", ""),
]
for s in scores
]
# 2. Tool usage — compact: {tool: [calls, failures, avg_ms]}
tool_stats: Dict[str, List[int]] = {}
for inv in invocations:
tool = inv.get("tool_name", "")
if tool not in tool_stats:
tool_stats[tool] = [0, 0, 0] # calls, failures, total_ms
tool_stats[tool][0] += 1
if not inv.get("success", True):
tool_stats[tool][1] += 1
tool_stats[tool][2] += inv.get("duration_ms", 0) or 0
data["tools"] = {
t: [v[0], v[1], round(v[2] / max(v[0], 1))]
for t, v in sorted(tool_stats.items(), key=lambda x: x[1][2], reverse=True)
}
# 3. Signals — compact: {type: count}
signal_types = {}
for s in signals:
stype = s.get("signal_type", "unknown")
signal_types[stype] = signal_types.get(stype, 0) + 1
data["signals"] = signal_types
# 4. Errors — only non-empty fields
err_data = {}
if errors.tool_failures:
err_data["tool_failures"] = [
f"{tf.tool_name}:{tf.error_type}x{tf.count}"
for tf in errors.tool_failures
]
if errors.retry_patterns:
err_data["retries"] = [
f"{rp.tool_name}x{rp.attempt_count}"
for rp in errors.retry_patterns[:5]
]
if errors.incomplete_sessions:
err_data["incomplete"] = len(errors.incomplete_sessions)
if errors.user_corrections:
err_data["corrections"] = errors.user_corrections
if errors.correction_examples:
err_data["correction_examples"] = errors.correction_examples[:2]
if errors.api_error_count:
err_data["api_errors"] = errors.api_error_count
if err_data:
data["errors"] = err_data
# 5. Waste — only non-empty
waste_data = {}
if waste.slowest_tools:
waste_data["slowest"] = [
f"{td.tool_name} {round(td.avg_duration_ms)}ms/{td.call_count}calls"
for td in waste.slowest_tools[:5]
]
if waste.repeated_operations:
waste_data["repeated"] = [
f"{ro.description} x{ro.count}"
for ro in waste.repeated_operations[:3]
]
if waste.inefficient_sessions:
waste_data["inefficient"] = len(waste.inefficient_sessions)
if waste_data:
data["waste"] = waste_data
# 6. Code changes — flat compact format
if code_analysis and code_analysis.commits:
cc = code_analysis
commits_data = []
for c in cc.commits[:10]:
entry = f"{c.hash_short} {c.subject} +{c.insertions}/-{c.deletions}"
if c.file_list:
entry += f" [{','.join(c.file_list[:5])}]"
if c.body:
entry += f" | {c.body[:150]}"
commits_data.append(entry)
data["code_changes"] = {
"stats": f"{cc.total_commits} commits +{cc.total_insertions}/-{cc.total_deletions} lines {cc.total_files_changed} files",
"categories": cc.change_categories,
"areas": cc.areas_touched,
"commits": commits_data,
}
data_json = json.dumps(data, ensure_ascii=False, indent=2)
# Fill template
prompt = template.replace("{period_range}", period_range)
prompt = prompt.replace("{sessions_count}", str(len(scores)))
prompt = prompt.replace("{avg_score}", f"{avg_score:.3f}")
prompt = prompt.replace("{total_invocations}", str(total_invocations))
prompt = prompt.replace("{success_rate}", f"{success_rate:.1f}")
prompt = prompt.replace("{data_json}", data_json)
return prompt
# ── Model Call ────────────────────────────────────────────────────────
def _call_model(self, prompt: str) -> Optional[str]:
"""Call the active model with automatic failover.
Resolution order:
1. Primary model (glm-5.1 via zai)
2. Fallback model (Qwen3.6 via local) if primary fails
Health check: when on fallback, probes primary every 30 min
and switches back when it recovers.
"""
self._current_prompt = prompt
active_cfg, is_fallback = _get_active_text_config(self.config)
base_url = active_cfg.get("base_url", "")
api_key = active_cfg.get("api_key", "")
model = active_cfg.get("model", "")
if not base_url or not model:
logger.warning("Incomplete runtime config: base_url=%s model=%s",
bool(base_url), model)
return None
result = self._call_chat_completions(base_url, api_key, model)
# If primary failed, try fallback
if result is None and not is_fallback:
fallback = self.config.get("fallback", {})
if fallback.get("base_url") and fallback.get("model"):
logger.warning("Primary model failed, trying fallback: %s",
fallback.get("model"))
result = self._call_chat_completions(
fallback["base_url"], fallback.get("api_key", ""),
fallback["model"],
)
if result is not None:
_switch_to_fallback()
return result
def _call_chat_completions(
self, base_url: str, api_key: str, model: str,
) -> Optional[str]:
"""Call OpenAI-compatible /chat/completions endpoint."""
try:
import requests
url = f"{base_url.rstrip('/')}/chat/completions"
headers = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
resp = requests.post(
url,
headers=headers,
json={
"model": model,
"messages": [
{"role": "system", "content": _SYSTEM_PROMPT},
{"role": "user", "content": self._current_prompt or ""},
],
"temperature": 0.3,
},
timeout=300,
)
if resp.status_code == 200:
data = resp.json()
return data.get("choices", [{}])[0].get("message", {}).get("content", "")
else:
logger.debug("Model call failed: %d %s", resp.status_code, resp.text[:200])
except Exception as exc:
logger.debug("Chat completions call failed: %s", exc)
return None
# ── Multimodal Call ───────────────────────────────────────────────────
def call_multimodal(self, prompt: str, images: list = None) -> Optional[str]:
"""Call multimodal model with text and optional images.
Routes to local multimodal model (gemma-4-26b-a4b-it-4bit) when
images are involved. Falls back to text model if no images.
Args:
prompt: Text prompt.
images: List of image data, each item is either:
- URL string (http/https/data:image)
- bytes (raw image data, auto-encoded to base64)
Returns:
Model response text, or None on failure.
"""
mm = self.config.get("multimodal", {})
if not mm or not mm.get("base_url"):
logger.debug("No multimodal model configured, falling back to text")
return self._call_model(prompt)
# Build content with images
content = [{"type": "text", "text": prompt}]
for img in (images or []):
if isinstance(img, bytes):
import base64
b64 = base64.b64encode(img).decode()
content.append({
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{b64}"},
})
elif isinstance(img, str):
content.append({
"type": "image_url",
"image_url": {"url": img},
})
try:
from openai import OpenAI
client = OpenAI(
base_url=mm["base_url"].rstrip("/") + ("/v1" if not mm["base_url"].rstrip("/").endswith("/v1") else ""),
api_key=mm.get("api_key") or "no-key",
)
resp = client.chat.completions.create(
model=mm["model"],
messages=[{"role": "user", "content": content}],
temperature=0.3,
max_tokens=2000,
timeout=120,
)
return resp.choices[0].message.content
except Exception as exc:
logger.debug("Multimodal call failed: %s", exc)
return None
# ── Reflection Parsing ────────────────────────────────────────────────
def _parse_reflection(
self,
reflection_text: str,
period_start: float,
period_end: float,
sessions_analyzed: int,
avg_score: float,
error_analysis: ErrorAnalysis,
waste_analysis: WasteAnalysis,
code_analysis: CodeChangeAnalysis = None,
) -> ReflectionReport:
"""Parse model output into structured ReflectionReport.
Extraction cascade:
1. Direct JSON parse
2. Strip markdown ```json ... ``` wrapper, retry JSON
3. Extract JSON object via regex (handle trailing text)
4. Text-based section extraction (fallback)
"""
worst_patterns = []
best_patterns = []
recommendations = []
tool_insights = {}
text = reflection_text.strip()
# 1. Direct JSON parse
data = _try_parse_json(text)
if data is None:
# 2. Strip markdown wrapper
m = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
if m:
data = _try_parse_json(m.group(1))
if data is None:
# 3. Regex extract first JSON object
m = re.search(r'\{[^{}]*"(?:worst|best|recommendations)"[^{}]*\}', text, re.DOTALL)
if m:
data = _try_parse_json(m.group(0))
if data is None:
# 3.5. Broader regex — find outermost braces
start = text.find('{')
end = text.rfind('}')
if start != -1 and end > start:
data = _try_parse_json(text[start:end + 1])
if data is not None:
worst_patterns = data.get("worst_patterns") or []
best_patterns = data.get("best_patterns") or []
recommendations = data.get("recommendations") or []
tool_insights = data.get("tool_insights") or {}
else:
# 4. Text-based extraction
section = None
for line in text.split("\n"):
stripped = line.strip()
lower = stripped.lower()
if ("worst" in lower and "pattern" in lower) or "最差" in stripped or "错误模式" in stripped:
section = "worst"
elif ("best" in lower and "pattern" in lower) or "最佳" in stripped or "成功" in stripped:
section = "best"
elif ("recommend" in lower) or "建议" in stripped:
section = "rec"
elif stripped.startswith("- ") or stripped.startswith("* ") or stripped.startswith(""):
item = stripped.lstrip("-*• ").strip()
if section == "worst":
worst_patterns.append(item)
elif section == "best":
best_patterns.append(item)
elif section == "rec":
recommendations.append(item)
elif len(stripped) > 2 and stripped[0].isdigit() and stripped[1] in ".)" and stripped[2] == " ":
item = stripped[3:].strip()
if section == "worst":
worst_patterns.append(item)
elif section == "best":
best_patterns.append(item)
elif section == "rec":
recommendations.append(item)
return ReflectionReport(
period_start=period_start,
period_end=period_end,
sessions_analyzed=sessions_analyzed,
avg_score=avg_score,
error_summary=error_analysis.summary(),
waste_summary=waste_analysis.summary(),
worst_patterns=worst_patterns,
best_patterns=best_patterns,
tool_insights=tool_insights,
recommendations=recommendations,
code_change_summary=code_analysis.summary() if code_analysis else "",
model_used=self.config.get("model", "unknown"),
)
# ── Default Prompt Template ──────────────────────────────────────────────
_SYSTEM_PROMPT = (
"你是 Hermes Agent 性能分析引擎。分析运行数据+代码变更输出严格JSON无markdown\n"
"格式:\n"
'{"worst_patterns":["模式(工具+场景+根因)"],"best_patterns":["成功经验"],'
'"tool_insights":{"工具":{"sr":0.95,"ms":500,"rec":"建议"}},'
'"recommendations":["做什么|效果|风险(l/m/h)|验证"]}\n'
"重点:系统性错误>偶发,错误连锁,策略vs工具问题,重复操作,代码设计合理性,自我进化状态,"
"可固化流程。≤5条建议,优先高影响低风险。无数据时输出空数组。"
)
_DEFAULT_REFLECTION_PROMPT = """## 概况
- 时段: {period_range}
- Session : {sessions_count}, 平均质量: {avg_score}
- 工具调用: {total_invocations} , 成功率 {success_rate}%
## 数据
{data_json}
"""
def _try_parse_json(text: str) -> Optional[dict]:
"""Try to parse JSON, returning None on any failure."""
try:
data = json.loads(text)
if isinstance(data, dict):
return data
except (json.JSONDecodeError, ValueError):
pass
return None

View file

@ -0,0 +1,101 @@
"""
Self Evolution Plugin Rule Engine (Strategy Matching)
========================================================
Conditional strategy matching engine.
Design reference: Claude Code plugins/hookify/core/rule_engine.py
- LRU-cached regex compilation (max 128)
- Multiple operators: regex_match, contains, equals, not_contains
- All conditions must match (AND logic)
- Severity levels: high, medium, low
"""
from __future__ import annotations
import re
from functools import lru_cache
from typing import Any, Dict, List, Optional
from self_evolution.models import StrategyRule, StrategyCondition
@lru_cache(maxsize=128)
def _compile_pattern(pattern: str) -> re.Pattern:
"""Compile and cache a regex pattern."""
return re.compile(pattern, re.IGNORECASE)
class StrategyRuleEngine:
"""Evaluate strategy rules against session context."""
def match_strategies(
self,
strategies: List[StrategyRule],
context: Dict[str, Any],
) -> List[StrategyRule]:
"""Return strategies whose conditions match the context."""
matched = []
for strategy in strategies:
if not strategy.enabled:
continue
if not strategy.conditions:
# No conditions = always match
matched.append(strategy)
continue
if self._conditions_match(strategy.conditions, context):
matched.append(strategy)
return matched
def _conditions_match(
self,
conditions: List[StrategyCondition],
context: Dict[str, Any],
) -> bool:
"""All conditions must match (AND logic)."""
for cond in conditions:
field_value = str(context.get(cond.field, ""))
if not self._check_operator(cond.operator, cond.pattern, field_value):
return False
return True
def _check_operator(self, op: str, pattern: str, value: str) -> bool:
"""Apply operator check."""
try:
if op == "regex_match":
return bool(_compile_pattern(pattern).search(value))
elif op == "contains":
return pattern in value
elif op == "equals":
return pattern == value
elif op == "not_contains":
return pattern not in value
elif op == "starts_with":
return value.startswith(pattern)
elif op == "ends_with":
return value.endswith(pattern)
else:
return False
except re.error:
return False
def format_hints(self, strategies: List[StrategyRule], max_chars: int = 0) -> str:
"""Format matched strategies into a system hint string.
Args:
max_chars: If > 0, truncate total output to this many characters.
"""
if not strategies:
return ""
lines = ["[自我进化策略提示]"]
for s in strategies:
type_prefix = {"hint": "💡", "avoid": "⚠️", "prefer": ""}.get(
s.strategy_type, "💡"
)
line = f"{type_prefix} {s.name}: {s.hint_text}"
if max_chars and len("\n".join(lines)) + len(line) > max_chars:
break
lines.append(line)
return "\n".join(lines)

View file

@ -0,0 +1,141 @@
"""
Self Evolution Plugin Strategy Compressor
=============================================
Compresses and merges redundant strategy rules into concise hints.
Called after dream consolidation to keep strategies.json compact.
Each hint_text must be 30 chars; strategies without conditions are
either merged into conditional rules or discarded.
"""
from __future__ import annotations
import logging
import re
from typing import Any, Dict, List
logger = logging.getLogger(__name__)
# Maximum allowed length for hint_text (characters)
MAX_HINT_LENGTH = 30
# Keyword clusters used to group similar strategies
_CLUSTERS: List[Dict[str, Any]] = [
{
"keywords": ["bash", "路径", "path", "校验", "预检", "验证", "存在"],
"hint": "bash前先read验证路径",
"condition": {"field": "tool_name", "operator": "contains", "pattern": "bash"},
},
{
"keywords": ["api", "调试", "debug", "降级", "只读", "探查"],
"hint": "API失败时降级只读探查",
"condition": {"field": "task_type", "operator": "contains", "pattern": "api"},
},
{
"keywords": ["browser", "浏览器", "timeout", "超时", "网页"],
"hint": "浏览器操作设置超时保护",
"condition": {"field": "tool_name", "operator": "contains", "pattern": "browser"},
},
{
"keywords": ["重试", "retry", "浪费", "重复", "循环"],
"hint": "避免重复重试相同操作",
"condition": {},
},
]
def compress_strategies(rules: List[dict]) -> List[dict]:
"""Compress strategy rules by merging similar ones.
Returns a new list of rules with:
- Duplicate hint_texts removed
- Similar rules merged into cluster summaries
- hint_text truncated to MAX_HINT_LENGTH
- Non-matching rules dropped if they have no conditions
"""
if not rules:
return []
# Deduplicate by hint_text
seen_hints: set[str] = set()
unique: list[dict] = []
for r in rules:
key = r.get("hint_text", "").strip().lower()
if key and key not in seen_hints:
seen_hints.add(key)
unique.append(r)
# Cluster similar rules
clustered = _cluster_rules(unique)
# Enforce constraints: hint_text ≤ 30 chars, must have conditions
result: list[dict] = []
for r in clustered:
hint = r.get("hint_text", "").strip()
conditions = r.get("conditions", [])
# Skip rules without conditions (they won't be injected anyway)
if not conditions:
logger.debug("Dropping unconditioned strategy: %s", hint[:40])
continue
# Truncate hint if needed
if len(hint) > MAX_HINT_LENGTH:
hint = hint[:MAX_HINT_LENGTH]
r["hint_text"] = hint
result.append(r)
# Also keep any manual/default rules that already have conditions
for r in unique:
if r.get("source") in ("manual", "default") and r.get("conditions"):
if r not in result:
hint = r.get("hint_text", "").strip()
if len(hint) > MAX_HINT_LENGTH:
r["hint_text"] = hint[:MAX_HINT_LENGTH]
result.append(r)
logger.info("Compressed strategies: %d%d rules", len(rules), len(result))
return result
def _cluster_rules(rules: list[dict]) -> list[dict]:
"""Group rules by keyword clusters and merge each group into one rule."""
matched_indices: set[int] = set()
merged: list[dict] = []
for cluster in _CLUSTERS:
group: list[dict] = []
for i, r in enumerate(rules):
text = f"{r.get('name', '')} {r.get('hint_text', '')}".lower()
if any(kw in text for kw in cluster["keywords"]):
group.append(r)
matched_indices.add(i)
if not group:
continue
# Merge group into one rule
first = group[0]
condition = cluster.get("condition")
merged_rule = {
"id": first.get("id", ""),
"name": cluster["hint"],
"type": "learned",
"description": cluster["hint"],
"hint_text": cluster["hint"],
"conditions": [condition] if condition else [],
"severity": "medium",
"enabled": True,
"source": "learned",
"created_at": first.get("created_at", 0),
}
merged.append(merged_rule)
# Add unmatched rules as-is
for i, r in enumerate(rules):
if i not in matched_indices:
merged.append(r)
return merged

View file

@ -0,0 +1,124 @@
"""
Self Evolution Plugin Strategy Injector
===========================================
Injects learned strategy hints into sessions via pre_llm_call hook.
Design reference: Claude Code plugins/learning-output-style/
- SessionStart hook injects behavioral context automatically
- Equivalent to CLAUDE.md but more flexible and distributable
- No core modification needed
"""
from __future__ import annotations
import logging
import time
from typing import Any, Dict, Optional
from self_evolution.models import StrategyRule
from self_evolution.rule_engine import StrategyRuleEngine
logger = logging.getLogger(__name__)
_engine = StrategyRuleEngine()
# ── TTL-based cache to avoid reading strategies.json on every LLM call ────
_cached_strategies: list | None = None
_cache_ts: float = 0.0
_CACHE_TTL: float = 60.0 # seconds
def _load_active_strategies() -> list:
"""Load active strategies from strategy store (cached for _CACHE_TTL)."""
global _cached_strategies, _cache_ts
now = time.time()
if _cached_strategies is not None and (now - _cache_ts) < _CACHE_TTL:
return _cached_strategies
from self_evolution.strategy_store import StrategyStore
store = StrategyStore()
data = store.load()
rules = data.get("rules", [])
strategies = []
for rule_data in rules:
if not rule_data.get("enabled", True):
continue
strategy = StrategyRule.from_dict(rule_data)
strategies.append(strategy)
_cached_strategies = strategies
_cache_ts = now
return strategies
def invalidate_cache():
"""Invalidate the strategy cache (call after strategy updates)."""
global _cached_strategies
_cached_strategies = None
_MAX_INJECT_STRATEGIES = 3 # 最多注入策略数
_MAX_HINT_CHARS = 100 # 注入提示总字符预算
_MAX_SINGLE_HINT = 30 # 单条 hint_text 最大字符数
def inject_hints(kwargs: dict) -> Optional[str]:
"""Pre-llm-call hook: inject learned strategy hints.
Rules:
- Strategies without conditions are skipped (must be condition-based).
- hint_text longer than _MAX_SINGLE_HINT chars are skipped.
- At most _MAX_INJECT_STRATEGIES hints, total _MAX_HINT_CHARS.
"""
strategies = _load_active_strategies()
if not strategies:
return None
# Build context from current session
context = _build_context(kwargs)
# Match strategies
matched = _engine.match_strategies(strategies, context)
if not matched:
return None
# Filter: require conditions and enforce hint length
eligible = []
for s in matched:
if not s.conditions:
continue # Skip unconditioned strategies
if len(s.hint_text.strip()) > _MAX_SINGLE_HINT:
continue # Skip overly long hints
eligible.append(s)
if not eligible:
return None
# Deduplicate by hint_text content
seen_hints: set[str] = set()
unique: list = []
for s in eligible:
key = s.hint_text.strip().lower()
if key not in seen_hints:
seen_hints.add(key)
unique.append(s)
# Cap count
selected = unique[:_MAX_INJECT_STRATEGIES]
# Format hints within char budget
return _engine.format_hints(selected, max_chars=_MAX_HINT_CHARS)
def _build_context(kwargs: dict) -> dict:
"""Build matching context from hook kwargs."""
return {
"platform": kwargs.get("platform", ""),
"model": kwargs.get("model", ""),
"task_type": kwargs.get("task_type", ""),
"tool_name": kwargs.get("tool_name", ""),
}

View file

@ -0,0 +1,72 @@
"""
Self Evolution Plugin Strategy Store
========================================
Manages strategy rules with version history and rollback support.
Strategies stored at ~/.hermes/self_evolution/strategies.json
Archives at ~/.hermes/self_evolution/archive/strategies_v{N}.json
"""
from __future__ import annotations
import json
import logging
from pathlib import Path
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
from self_evolution.paths import DATA_DIR as STRATEGIES_DIR, STRATEGIES_FILE, ARCHIVE_DIR
class StrategyStore:
"""Load, save, and version strategy rules."""
def load(self) -> dict:
"""Load current strategies."""
if not STRATEGIES_FILE.exists():
return {"version": 0, "rules": []}
try:
return json.loads(STRATEGIES_FILE.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return {"version": 0, "rules": []}
def save(self, data: dict):
"""Save strategies to file."""
STRATEGIES_DIR.mkdir(parents=True, exist_ok=True)
STRATEGIES_FILE.write_text(
json.dumps(data, ensure_ascii=False, indent=2),
encoding="utf-8",
)
def archive(self, version: int):
"""Archive current strategies for rollback."""
if not STRATEGIES_FILE.exists():
return
ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
archive_path = ARCHIVE_DIR / f"strategies_v{version}.json"
archive_path.write_text(
STRATEGIES_FILE.read_text(encoding="utf-8"),
encoding="utf-8",
)
logger.info("Archived strategies version %d", version)
def load_archive(self, version: int) -> Optional[dict]:
"""Load an archived version."""
archive_path = ARCHIVE_DIR / f"strategies_v{version}.json"
if not archive_path.exists():
return None
try:
return json.loads(archive_path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return None
def restore(self, data: dict):
"""Restore strategies from an archive."""
self.save(data)
logger.info("Restored strategies from archive")
def get_version(self) -> int:
"""Get current version number."""
return self.load().get("version", 0)

1253
tests/test_self_evolution.py Normal file

File diff suppressed because it is too large Load diff