mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge 3cd384dc43 into 13038dc747
This commit is contained in:
commit
c90dd4860f
23 changed files with 6173 additions and 0 deletions
911
docs/self-evolution-design.html
Normal file
911
docs/self-evolution-design.html
Normal file
|
|
@ -0,0 +1,911 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Hermes Agent 自我优化与持续进化系统设计</title>
|
||||
<style>
|
||||
:root {
|
||||
--bg: #0f1117;
|
||||
--bg-card: #1a1d2e;
|
||||
--bg-card2: #232740;
|
||||
--border: #2d3250;
|
||||
--text: #e2e8f0;
|
||||
--text-dim: #94a3b8;
|
||||
--accent: #6366f1;
|
||||
--accent2: #8b5cf6;
|
||||
--green: #10b981;
|
||||
--green-dim: rgba(16,185,129,0.15);
|
||||
--amber: #f59e0b;
|
||||
--amber-dim: rgba(245,158,11,0.15);
|
||||
--red: #ef4444;
|
||||
--red-dim: rgba(239,68,68,0.15);
|
||||
--blue: #3b82f6;
|
||||
--blue-dim: rgba(59,130,246,0.15);
|
||||
--cyan: #06b6d4;
|
||||
--pink: #ec4899;
|
||||
}
|
||||
* { margin:0; padding:0; box-sizing:border-box; }
|
||||
body {
|
||||
background: var(--bg);
|
||||
color: var(--text);
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
line-height: 1.6;
|
||||
padding: 2rem;
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
h1 { font-size: 2rem; font-weight: 700; margin-bottom: 0.5rem; }
|
||||
h2 { font-size: 1.5rem; font-weight: 600; margin: 2.5rem 0 1rem; color: var(--accent); }
|
||||
h3 { font-size: 1.15rem; font-weight: 600; margin: 1.5rem 0 0.75rem; }
|
||||
p { color: var(--text-dim); margin-bottom: 1rem; }
|
||||
.subtitle { color: var(--text-dim); font-size: 1.05rem; margin-bottom: 2rem; }
|
||||
|
||||
/* Hero */
|
||||
.hero {
|
||||
background: linear-gradient(135deg, #1e1b4b 0%, #0f172a 50%, #0c1220 100%);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 16px;
|
||||
padding: 3rem;
|
||||
margin-bottom: 2rem;
|
||||
position: relative;
|
||||
overflow: hidden;
|
||||
}
|
||||
.hero::before {
|
||||
content: '';
|
||||
position: absolute;
|
||||
top: -50%;
|
||||
right: -20%;
|
||||
width: 500px;
|
||||
height: 500px;
|
||||
background: radial-gradient(circle, rgba(99,102,241,0.12) 0%, transparent 70%);
|
||||
pointer-events: none;
|
||||
}
|
||||
.hero h1 { position: relative; }
|
||||
.hero .subtitle { position: relative; }
|
||||
.badge-row { display: flex; gap: 0.5rem; flex-wrap: wrap; margin-top: 1.5rem; position: relative; }
|
||||
.badge {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 0.35rem;
|
||||
padding: 0.3rem 0.75rem;
|
||||
border-radius: 999px;
|
||||
font-size: 0.8rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
.badge-purple { background: rgba(139,92,246,0.15); color: #a78bfa; border: 1px solid rgba(139,92,246,0.25); }
|
||||
.badge-green { background: var(--green-dim); color: var(--green); border: 1px solid rgba(16,185,129,0.25); }
|
||||
.badge-blue { background: var(--blue-dim); color: var(--blue); border: 1px solid rgba(59,130,246,0.25); }
|
||||
.badge-amber { background: var(--amber-dim); color: var(--amber); border: 1px solid rgba(245,158,11,0.25); }
|
||||
|
||||
/* Cards */
|
||||
.card {
|
||||
background: var(--bg-card);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 12px;
|
||||
padding: 1.5rem;
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
.card-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(320px, 1fr)); gap: 1.5rem; }
|
||||
|
||||
/* Architecture Diagram */
|
||||
.arch-container {
|
||||
background: var(--bg-card);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 16px;
|
||||
padding: 2rem;
|
||||
margin: 2rem 0;
|
||||
overflow-x: auto;
|
||||
}
|
||||
.arch-flow {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
gap: 0.5rem;
|
||||
flex-wrap: wrap;
|
||||
min-width: 700px;
|
||||
}
|
||||
.arch-node {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: 0.35rem;
|
||||
padding: 1rem 1.25rem;
|
||||
border-radius: 12px;
|
||||
min-width: 110px;
|
||||
text-align: center;
|
||||
position: relative;
|
||||
transition: transform 0.2s;
|
||||
}
|
||||
.arch-node:hover { transform: translateY(-3px); }
|
||||
.arch-node .icon { font-size: 1.5rem; }
|
||||
.arch-node .label { font-size: 0.85rem; font-weight: 600; }
|
||||
.arch-node .desc { font-size: 0.7rem; color: var(--text-dim); }
|
||||
|
||||
.node-observe { background: var(--blue-dim); border: 1px solid rgba(59,130,246,0.3); }
|
||||
.node-evaluate { background: rgba(139,92,246,0.12); border: 1px solid rgba(139,92,246,0.3); }
|
||||
.node-reflect { background: rgba(6,182,212,0.12); border: 1px solid rgba(6,182,212,0.3); }
|
||||
.node-learn { background: var(--green-dim); border: 1px solid rgba(16,185,129,0.3); }
|
||||
.node-evolve { background: var(--amber-dim); border: 1px solid rgba(245,158,11,0.3); }
|
||||
.node-data { background: rgba(236,72,153,0.1); border: 1px solid rgba(236,72,153,0.25); }
|
||||
|
||||
.arch-arrow {
|
||||
font-size: 1.5rem;
|
||||
color: var(--text-dim);
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
/* Timeline */
|
||||
.timeline {
|
||||
position: relative;
|
||||
padding-left: 2.5rem;
|
||||
margin: 2rem 0;
|
||||
}
|
||||
.timeline::before {
|
||||
content: '';
|
||||
position: absolute;
|
||||
left: 0.75rem;
|
||||
top: 0;
|
||||
bottom: 0;
|
||||
width: 2px;
|
||||
background: linear-gradient(to bottom, var(--accent), var(--cyan), var(--green), var(--amber));
|
||||
}
|
||||
.tl-item {
|
||||
position: relative;
|
||||
margin-bottom: 2rem;
|
||||
padding: 1.25rem 1.5rem;
|
||||
background: var(--bg-card);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 12px;
|
||||
}
|
||||
.tl-item::before {
|
||||
content: '';
|
||||
position: absolute;
|
||||
left: -2.05rem;
|
||||
top: 1.4rem;
|
||||
width: 12px;
|
||||
height: 12px;
|
||||
border-radius: 50%;
|
||||
border: 2px solid var(--accent);
|
||||
background: var(--bg);
|
||||
}
|
||||
.tl-item.night::before { border-color: var(--cyan); }
|
||||
.tl-item.morning::before { border-color: var(--green); }
|
||||
.tl-item.action::before { border-color: var(--amber); }
|
||||
.tl-item .tl-time {
|
||||
font-size: 0.8rem;
|
||||
font-weight: 600;
|
||||
color: var(--cyan);
|
||||
margin-bottom: 0.35rem;
|
||||
}
|
||||
.tl-item.morning .tl-time { color: var(--green); }
|
||||
.tl-item.action .tl-time { color: var(--amber); }
|
||||
.tl-item .tl-title { font-weight: 600; margin-bottom: 0.5rem; }
|
||||
.tl-item .tl-desc { font-size: 0.9rem; color: var(--text-dim); }
|
||||
|
||||
/* Flowchart-style dream */
|
||||
.flow-box {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.flow-step {
|
||||
display: flex;
|
||||
align-items: flex-start;
|
||||
gap: 1rem;
|
||||
padding: 1rem;
|
||||
background: var(--bg-card2);
|
||||
border-radius: 8px;
|
||||
border-left: 3px solid var(--accent);
|
||||
}
|
||||
.flow-step.step-error { border-left-color: var(--red); }
|
||||
.flow-step.step-waste { border-left-color: var(--amber); }
|
||||
.flow-step.step-model { border-left-color: var(--cyan); }
|
||||
.flow-step.step-output { border-left-color: var(--green); }
|
||||
.flow-step .step-num {
|
||||
flex-shrink: 0;
|
||||
width: 28px;
|
||||
height: 28px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
border-radius: 50%;
|
||||
background: var(--accent);
|
||||
color: #fff;
|
||||
font-size: 0.8rem;
|
||||
font-weight: 700;
|
||||
}
|
||||
.flow-step.step-error .step-num { background: var(--red); }
|
||||
.flow-step.step-waste .step-num { background: var(--amber); }
|
||||
.flow-step.step-model .step-num { background: var(--cyan); }
|
||||
.flow-step.step-output .step-num { background: var(--green); }
|
||||
.flow-step .step-content { flex: 1; }
|
||||
.flow-step .step-title { font-weight: 600; font-size: 0.95rem; margin-bottom: 0.25rem; }
|
||||
.flow-step .step-desc { font-size: 0.85rem; color: var(--text-dim); }
|
||||
.flow-step ul { margin: 0.35rem 0 0 1rem; font-size: 0.85rem; color: var(--text-dim); }
|
||||
.flow-step li { margin-bottom: 0.15rem; }
|
||||
|
||||
/* Feishu mockup */
|
||||
.feishu-card {
|
||||
background: #fff;
|
||||
border-radius: 12px;
|
||||
padding: 1.5rem;
|
||||
color: #1f2937;
|
||||
max-width: 420px;
|
||||
margin: 1.5rem auto;
|
||||
box-shadow: 0 4px 24px rgba(0,0,0,0.3);
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
.feishu-card .fc-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
padding-bottom: 0.75rem;
|
||||
border-bottom: 1px solid #e5e7eb;
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
.feishu-card .fc-header .fc-icon {
|
||||
width: 32px; height: 32px;
|
||||
background: linear-gradient(135deg, #3b82f6, #8b5cf6);
|
||||
border-radius: 8px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
color: #fff;
|
||||
font-size: 1rem;
|
||||
}
|
||||
.feishu-card .fc-header .fc-title { font-weight: 600; }
|
||||
.feishu-card .fc-section { margin-bottom: 0.75rem; }
|
||||
.feishu-card .fc-section-title { font-weight: 600; font-size: 0.85rem; margin-bottom: 0.35rem; color: #374151; }
|
||||
.feishu-card .fc-row { display: flex; justify-content: space-between; font-size: 0.8rem; color: #6b7280; padding: 0.1rem 0; }
|
||||
.feishu-card .fc-proposal {
|
||||
background: #f9fafb;
|
||||
border-radius: 8px;
|
||||
padding: 0.75rem;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
.feishu-card .fc-proposal-title { font-weight: 600; font-size: 0.85rem; margin-bottom: 0.25rem; }
|
||||
.feishu-card .fc-proposal-desc { font-size: 0.78rem; color: #6b7280; margin-bottom: 0.5rem; }
|
||||
.feishu-card .fc-btns { display: flex; gap: 0.5rem; }
|
||||
.feishu-card .fc-btn {
|
||||
padding: 0.3rem 0.75rem;
|
||||
border-radius: 6px;
|
||||
font-size: 0.78rem;
|
||||
font-weight: 500;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
}
|
||||
.fc-btn-approve { background: #3b82f6; color: #fff; }
|
||||
.fc-btn-modify { background: #f3f4f6; color: #374151; border: 1px solid #d1d5db; }
|
||||
.fc-btn-reject { background: #fef2f2; color: #ef4444; border: 1px solid #fecaca; }
|
||||
|
||||
/* Ref table */
|
||||
.ref-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 1rem; }
|
||||
.ref-card {
|
||||
background: var(--bg-card2);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 10px;
|
||||
padding: 1.25rem;
|
||||
}
|
||||
.ref-card .ref-source {
|
||||
font-size: 0.75rem;
|
||||
color: var(--cyan);
|
||||
margin-bottom: 0.5rem;
|
||||
font-family: 'SF Mono', monospace;
|
||||
}
|
||||
.ref-card .ref-title { font-weight: 600; margin-bottom: 0.5rem; }
|
||||
.ref-card .ref-desc { font-size: 0.85rem; color: var(--text-dim); }
|
||||
|
||||
/* DB schema */
|
||||
.db-table {
|
||||
background: var(--bg-card2);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 8px;
|
||||
padding: 1rem;
|
||||
margin-bottom: 1rem;
|
||||
font-family: 'SF Mono', 'Fira Code', monospace;
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
.db-table .db-name {
|
||||
color: var(--cyan);
|
||||
font-weight: 700;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
.db-table .db-col { color: var(--text-dim); padding: 0.1rem 0; }
|
||||
.db-table .db-col span { color: var(--amber); }
|
||||
|
||||
/* Safety */
|
||||
.safety-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(240px, 1fr)); gap: 1rem; }
|
||||
.safety-item {
|
||||
background: var(--bg-card2);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 10px;
|
||||
padding: 1.25rem;
|
||||
text-align: center;
|
||||
}
|
||||
.safety-item .safety-icon { font-size: 2rem; margin-bottom: 0.5rem; }
|
||||
.safety-item .safety-title { font-weight: 600; font-size: 0.95rem; margin-bottom: 0.35rem; }
|
||||
.safety-item .safety-desc { font-size: 0.82rem; color: var(--text-dim); }
|
||||
|
||||
/* File tree */
|
||||
.file-tree {
|
||||
font-family: 'SF Mono', 'Fira Code', monospace;
|
||||
font-size: 0.82rem;
|
||||
line-height: 1.8;
|
||||
color: var(--text-dim);
|
||||
background: var(--bg-card2);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 8px;
|
||||
padding: 1.25rem;
|
||||
overflow-x: auto;
|
||||
}
|
||||
.file-tree .dir { color: var(--cyan); font-weight: 600; }
|
||||
.file-tree .file { color: var(--text); }
|
||||
.file-tree .comment { color: var(--text-dim); font-style: italic; }
|
||||
|
||||
/* Quality formula */
|
||||
.formula {
|
||||
background: var(--bg-card2);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 10px;
|
||||
padding: 1.5rem 2rem;
|
||||
margin: 1rem 0;
|
||||
font-family: 'SF Mono', 'Fira Code', monospace;
|
||||
font-size: 0.88rem;
|
||||
text-align: center;
|
||||
line-height: 2;
|
||||
}
|
||||
.formula .w { color: var(--amber); }
|
||||
.formula .var { color: var(--cyan); }
|
||||
.formula .op { color: var(--text-dim); }
|
||||
|
||||
/* Integration table */
|
||||
.int-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin: 1rem 0;
|
||||
font-size: 0.88rem;
|
||||
}
|
||||
.int-table th {
|
||||
text-align: left;
|
||||
padding: 0.75rem 1rem;
|
||||
background: var(--bg-card2);
|
||||
color: var(--text-dim);
|
||||
font-weight: 600;
|
||||
font-size: 0.8rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
}
|
||||
.int-table td {
|
||||
padding: 0.65rem 1rem;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
.int-table .hook {
|
||||
font-family: 'SF Mono', monospace;
|
||||
font-size: 0.8rem;
|
||||
color: var(--cyan);
|
||||
background: rgba(6,182,212,0.1);
|
||||
padding: 0.15rem 0.5rem;
|
||||
border-radius: 4px;
|
||||
}
|
||||
.int-table .no-mod { color: var(--green); }
|
||||
|
||||
/* Phase timeline */
|
||||
.phases { display: grid; grid-template-columns: repeat(4, 1fr); gap: 1rem; margin: 1.5rem 0; }
|
||||
.phase {
|
||||
background: var(--bg-card2);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 10px;
|
||||
padding: 1.25rem;
|
||||
position: relative;
|
||||
}
|
||||
.phase .phase-num {
|
||||
font-size: 2rem;
|
||||
font-weight: 800;
|
||||
color: var(--accent);
|
||||
opacity: 0.3;
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
.phase .phase-title { font-weight: 600; font-size: 0.95rem; margin-bottom: 0.5rem; }
|
||||
.phase ul { margin-left: 1rem; font-size: 0.82rem; color: var(--text-dim); }
|
||||
.phase li { margin-bottom: 0.25rem; }
|
||||
|
||||
/* Arrow connector between phases */
|
||||
.phase:not(:last-child)::after {
|
||||
content: '→';
|
||||
position: absolute;
|
||||
right: -1.2rem;
|
||||
top: 50%;
|
||||
transform: translateY(-50%);
|
||||
font-size: 1.5rem;
|
||||
color: var(--text-dim);
|
||||
}
|
||||
|
||||
/* Scrollbar */
|
||||
::-webkit-scrollbar { width: 6px; height: 6px; }
|
||||
::-webkit-scrollbar-track { background: transparent; }
|
||||
::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
|
||||
|
||||
@media (max-width: 768px) {
|
||||
body { padding: 1rem; }
|
||||
.phases { grid-template-columns: 1fr 1fr; }
|
||||
.phase:not(:last-child)::after { display: none; }
|
||||
.arch-flow { flex-direction: column; }
|
||||
.arch-arrow { transform: rotate(90deg); }
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<!-- ═══════ Hero ═══════ -->
|
||||
<div class="hero">
|
||||
<h1>Hermes Agent 自我优化与持续进化系统</h1>
|
||||
<p class="subtitle">一套完全插件化的 agent 自我进化机制 — 通过每日"梦境整理"和"飞书审批流"实现闭环自我优化</p>
|
||||
<div class="badge-row">
|
||||
<span class="badge badge-purple">零侵入核心代码</span>
|
||||
<span class="badge badge-blue">完全插件化</span>
|
||||
<span class="badge badge-green">GLM-5.1 / Qwen 降级</span>
|
||||
<span class="badge badge-amber">飞书审批流</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ═══════ Architecture ═══════ -->
|
||||
<h2>核心架构:五层闭环</h2>
|
||||
<p>观察 → 评估 → 反思 → 学习 → 进化,形成持续自我改进的闭环循环。</p>
|
||||
|
||||
<div class="arch-container">
|
||||
<div class="arch-flow">
|
||||
<div class="arch-node node-observe">
|
||||
<span class="icon">📡</span>
|
||||
<span class="label">观察</span>
|
||||
<span class="desc">遥测采集<br>post_tool_call</span>
|
||||
</div>
|
||||
<span class="arch-arrow">→</span>
|
||||
<div class="arch-node node-evaluate">
|
||||
<span class="icon">📊</span>
|
||||
<span class="label">评估</span>
|
||||
<span class="desc">质量评分<br>on_session_end</span>
|
||||
</div>
|
||||
<span class="arch-arrow">→</span>
|
||||
<div class="arch-node node-reflect">
|
||||
<span class="icon">🌙</span>
|
||||
<span class="label">反思</span>
|
||||
<span class="desc">梦境整理<br>凌晨 1:00</span>
|
||||
</div>
|
||||
<span class="arch-arrow">→</span>
|
||||
<div class="arch-node node-learn">
|
||||
<span class="icon">🧠</span>
|
||||
<span class="label">学习</span>
|
||||
<span class="desc">进化提案<br>策略生成</span>
|
||||
</div>
|
||||
<span class="arch-arrow">→</span>
|
||||
<div class="arch-node node-evolve">
|
||||
<span class="icon">🚀</span>
|
||||
<span class="label">进化</span>
|
||||
<span class="desc">飞书审批 → 执行<br>19:00 推送</span>
|
||||
</div>
|
||||
<span class="arch-arrow">↩</span>
|
||||
<div class="arch-node node-data">
|
||||
<span class="icon">💾</span>
|
||||
<span class="label">存储</span>
|
||||
<span class="desc">evolution.db<br>strategies.json</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ═══════ Daily Flow ═══════ -->
|
||||
<h2>每日流程</h2>
|
||||
<p>从凌晨梦境整理到晚间飞书推送,一天的自动进化循环。</p>
|
||||
|
||||
<div class="timeline">
|
||||
<div class="tl-item night">
|
||||
<div class="tl-time">01:00 — 梦境整理(自动执行)</div>
|
||||
<div class="tl-title">DreamEngine.run() — 分析前日全部 session</div>
|
||||
<div class="flow-box" style="margin-top: 1rem;">
|
||||
<div class="flow-step">
|
||||
<div class="step-num">1</div>
|
||||
<div class="step-content">
|
||||
<div class="step-title">数据汇总</div>
|
||||
<div class="step-desc">读取 state.db(只读)+ evolution.db,计算各 session 质量评分</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flow-step step-error">
|
||||
<div class="step-num">2</div>
|
||||
<div class="step-content">
|
||||
<div class="step-title">错误分析(重点)</div>
|
||||
<ul>
|
||||
<li>工具调用失败统计(按工具、按错误类型分布)</li>
|
||||
<li>反复重试检测(同一工具同一 session 调用 > 2次)</li>
|
||||
<li>未完成 session、用户纠正消息、API 错误</li>
|
||||
<li>错误连锁分析(一个失败是否引发后续失败)</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flow-step step-waste">
|
||||
<div class="step-num">3</div>
|
||||
<div class="step-content">
|
||||
<div class="step-title">时间浪费分析(重点)</div>
|
||||
<ul>
|
||||
<li>耗时最长的工具调用 TOP 10</li>
|
||||
<li>重复操作(多次读同一文件、重复搜索)</li>
|
||||
<li>低效 session(迭代轮数过多、工具调用过多)</li>
|
||||
<li>可缩短的工具调用链</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flow-step step-model">
|
||||
<div class="step-num">4</div>
|
||||
<div class="step-content">
|
||||
<div class="step-title">深度反思(GLM-5.1 优先 / Qwen 降级)</div>
|
||||
<div class="step-desc">将分析结果发送到本地模型,产出结构化 ReflectionReport:错误根因 + 浪费根因 + 可操作建议</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flow-step step-output">
|
||||
<div class="step-num">5</div>
|
||||
<div class="step-content">
|
||||
<div class="step-title">模式识别 + 生成进化提案</div>
|
||||
<div class="step-desc">高成功率模式 → 候选技能 | 重复错误 → 候选规避策略 | 系统性浪费 → 候选流程优化</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="tl-item morning">
|
||||
<div class="tl-time">19:00 — 飞书推送进化方案</div>
|
||||
<div class="tl-title">FeishuNotifier.send_daily_report()</div>
|
||||
<div class="tl-desc">读取当日凌晨产出的 pending_approval 提案,格式化为飞书交互卡片推送给用户。</div>
|
||||
</div>
|
||||
|
||||
<div class="tl-item action">
|
||||
<div class="tl-time">用户审批后 — 执行进化</div>
|
||||
<div class="tl-title">EvolutionExecutor.execute()</div>
|
||||
<div class="tl-desc">飞书回调触发执行:技能创建 / 策略调整 / 记忆更新 / 工具偏好变更。执行后自动创建 A/B 测试追踪单元。</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ═══════ Feishu Mockup ═══════ -->
|
||||
<h3>飞书卡片消息预览</h3>
|
||||
<div class="feishu-card">
|
||||
<div class="fc-header">
|
||||
<div class="fc-icon">🌅</div>
|
||||
<div>
|
||||
<div class="fc-title">Hermes 每日进化报告 (2026-04-18)</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="fc-section">
|
||||
<div class="fc-section-title">📊 前日概况</div>
|
||||
<div class="fc-row"><span>完成 sessions</span><span>23</span></div>
|
||||
<div class="fc-row"><span>平均质量评分</span><span>0.78 ↑0.03</span></div>
|
||||
<div class="fc-row"><span>工具调用 / 成功率</span><span>156次 / 91%</span></div>
|
||||
</div>
|
||||
<div class="fc-section">
|
||||
<div class="fc-section-title">❌ 错误分析</div>
|
||||
<div class="fc-row"><span>browser_tool 失败</span><span>5次 (超时3次)</span></div>
|
||||
<div class="fc-row"><span>未完成 session</span><span>2个</span></div>
|
||||
<div class="fc-row"><span>用户纠正</span><span>3次</span></div>
|
||||
</div>
|
||||
<div class="fc-section">
|
||||
<div class="fc-section-title">⏱️ 时间浪费分析</div>
|
||||
<div class="fc-row"><span>重复读取同一文件</span><span>8次</span></div>
|
||||
<div class="fc-row"><span>web_search→browser 冗余</span><span>6次</span></div>
|
||||
<div class="fc-row"><span>平均迭代轮数</span><span>12轮 (理想8轮)</span></div>
|
||||
</div>
|
||||
<hr style="border-color:#e5e7eb; margin:0.75rem 0;">
|
||||
<div class="fc-section">
|
||||
<div class="fc-section-title">📋 进化提案 (3项)</div>
|
||||
<div class="fc-proposal">
|
||||
<div class="fc-proposal-title">[1] 🛠️ 创建技能: web_search_pipeline</div>
|
||||
<div class="fc-proposal-desc">预期: 搜索任务成功率 +15% | 风险: low</div>
|
||||
<div class="fc-btns">
|
||||
<button class="fc-btn fc-btn-approve">通过</button>
|
||||
<button class="fc-btn fc-btn-modify">修改</button>
|
||||
<button class="fc-btn fc-btn-reject">拒绝</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="fc-proposal">
|
||||
<div class="fc-proposal-title">[2] ⚡ 策略调整: 优先 grep 替代 find</div>
|
||||
<div class="fc-proposal-desc">预期: 文件搜索效率 +25% | 风险: low</div>
|
||||
<div class="fc-btns">
|
||||
<button class="fc-btn fc-btn-approve">通过</button>
|
||||
<button class="fc-btn fc-btn-modify">修改</button>
|
||||
<button class="fc-btn fc-btn-reject">拒绝</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="fc-proposal">
|
||||
<div class="fc-proposal-title">[3] 🧠 记忆更新: 用户偏好中文回复</div>
|
||||
<div class="fc-proposal-desc">预期: 用户满意度提升 | 风险: low</div>
|
||||
<div class="fc-btns">
|
||||
<button class="fc-btn fc-btn-approve">通过</button>
|
||||
<button class="fc-btn fc-btn-modify">修改</button>
|
||||
<button class="fc-btn fc-btn-reject">拒绝</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ═══════ Quality Score ═══════ -->
|
||||
<h2>质量评分体系</h2>
|
||||
<p>每个 session 结束时自动计算复合质量评分,零 API 成本。</p>
|
||||
|
||||
<div class="formula">
|
||||
<span class="var">session_quality</span> <span class="op">=</span>
|
||||
<span class="w">0.40</span> × <span class="var">completion_rate</span> <span class="op">+</span>
|
||||
<span class="w">0.20</span> × <span class="var">efficiency_score</span> <span class="op">+</span>
|
||||
<span class="w">0.15</span> × <span class="var">cost_efficiency</span> <span class="op">+</span>
|
||||
<span class="w">0.25</span> × <span class="var">satisfaction_proxy</span>
|
||||
</div>
|
||||
|
||||
<div class="card-grid">
|
||||
<div class="card">
|
||||
<h3>completion_rate <span style="color:var(--w);font-size:0.8rem;">权重 0.40</span></h3>
|
||||
<p>任务是否完成。completed=1.0, interrupted=0.5, failed=0.0</p>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h3>efficiency_score <span style="color:var(--w);font-size:0.8rem;">权重 0.20</span></h3>
|
||||
<p>迭代效率。理想轮数 / 实际轮数,上限 1.0</p>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h3>cost_efficiency <span style="color:var(--w);font-size:0.8rem;">权重 0.15</span></h3>
|
||||
<p>工具使用效率。期望调用数 / 实际调用数,上限 1.0</p>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h3>satisfaction_proxy <span style="color:var(--w);font-size:0.8rem;">权重 0.25</span></h3>
|
||||
<p>满意度代理。单轮完成=0.9, 多轮完成=0.75, 预算耗尽=-0.2</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ═══════ Claude Code References ═══════ -->
|
||||
<h2>Claude Code 设计参考</h2>
|
||||
<p>本方案借鉴了 Claude Code 开源项目中的四个核心设计模式。</p>
|
||||
|
||||
<div class="ref-grid">
|
||||
<div class="ref-card">
|
||||
<div class="ref-source">plugins/hookify/agents/conversation-analyzer.md</div>
|
||||
<div class="ref-title">梦境整理 ← conversation-analyzer</div>
|
||||
<div class="ref-desc">
|
||||
分析对话历史 → 识别纠正/沮丧/重复问题信号 → 提取可匹配正则规则 → 按严重程度分级(高/中/低)。
|
||||
<br><br><b>我们的扩展</b>:从手动触发升级为每日自动运行,增加错误分析和时间浪费分析。
|
||||
</div>
|
||||
</div>
|
||||
<div class="ref-card">
|
||||
<div class="ref-source">plugins/ralph-wiggum/</div>
|
||||
<div class="ref-title">进化执行 ← Ralph Wiggum</div>
|
||||
<div class="ref-desc">
|
||||
自我引用反馈环:Stop hook 拦截退出 → 重喂 prompt → agent 看到自己的修改 → 自动迭代直到满足条件。
|
||||
<br><br><b>我们的扩展</b>:进化执行后创建验证追踪单元(类似 completion_promise),不满足条件自动回滚。
|
||||
</div>
|
||||
</div>
|
||||
<div class="ref-card">
|
||||
<div class="ref-source">plugins/learning-output-style/</div>
|
||||
<div class="ref-title">策略注入 ← SessionStart hook</div>
|
||||
<div class="ref-desc">
|
||||
通过 SessionStart hook 在每个 session 自动注入行为上下文,等效于 CLAUDE.md 但更灵活。
|
||||
<br><br><b>我们的扩展</b>:使用 pre_llm_call 钩子注入已学习的行为提示,完全隔离于核心代码。
|
||||
</div>
|
||||
</div>
|
||||
<div class="ref-card">
|
||||
<div class="ref-source">plugins/hookify/core/rule_engine.py</div>
|
||||
<div class="ref-title">规则引擎 ← rule_engine</div>
|
||||
<div class="ref-desc">
|
||||
LRU 缓存编译正则(128 上限),支持 regex_match/contains/equals/not_contains,区分 block/warn 级别。
|
||||
<br><br><b>我们的扩展</b>:策略注入条件化,根据 session 特征(平台/任务类型/模型)匹配最相关规则。
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ═══════ Isolation ═══════ -->
|
||||
<h2>隔离策略:零侵入核心代码</h2>
|
||||
<p>所有功能以插件形式实现,通过钩子集成,不修改任何上游核心文件。</p>
|
||||
|
||||
<div class="card-grid">
|
||||
<div class="card">
|
||||
<h3>插件文件结构</h3>
|
||||
<div class="file-tree">
|
||||
<span class="dir">self_evolution/</span>
|
||||
├── plugin.yaml
|
||||
├── __init__.py <span class="comment"># register(ctx)</span>
|
||||
├── db.py <span class="comment"># 独立 SQLite</span>
|
||||
├── hooks.py <span class="comment"># 3个钩子</span>
|
||||
├── quality_scorer.py <span class="comment"># 质量评分</span>
|
||||
├── <span class="dir">reflection_engine.py</span> <span class="comment"># 梦境整理</span>
|
||||
├── rule_engine.py <span class="comment"># 条件匹配</span>
|
||||
├── evolution_proposer.py
|
||||
├── evolution_executor.py
|
||||
├── feishu_notifier.py
|
||||
├── strategy_injector.py
|
||||
├── strategy_store.py
|
||||
├── cron_jobs.py
|
||||
├── models.py
|
||||
├── <span class="dir">agents/</span>
|
||||
│ ├── dream_analyzer.md
|
||||
│ └── evolution_planner.md
|
||||
└── <span class="dir">prompts/</span>
|
||||
└── reflection.md
|
||||
</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h3>钩子集成方式</h3>
|
||||
<table class="int-table">
|
||||
<tr><th>功能</th><th>集成方式</th><th>修改核心</th></tr>
|
||||
<tr><td>工具调用遥测</td><td><span class="hook">post_tool_call</span></td><td class="no-mod">NO</td></tr>
|
||||
<tr><td>Session 评分</td><td><span class="hook">on_session_end</span></td><td class="no-mod">NO</td></tr>
|
||||
<tr><td>策略注入</td><td><span class="hook">pre_llm_call</span></td><td class="no-mod">NO</td></tr>
|
||||
<tr><td>定时任务</td><td>cron/jobs.json</td><td class="no-mod">NO</td></tr>
|
||||
<tr><td>飞书通知</td><td>gateway/ 飞书网关</td><td class="no-mod">NO</td></tr>
|
||||
<tr><td>技能创建</td><td>skill_manager_tool</td><td class="no-mod">NO</td></tr>
|
||||
<tr><td>记忆更新</td><td>memory_tool</td><td class="no-mod">NO</td></tr>
|
||||
<tr><td>历史数据</td><td>state.db 只读</td><td class="no-mod">NO</td></tr>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ═══════ Database ═══════ -->
|
||||
<h2>独立数据库设计</h2>
|
||||
<p>独立于核心 state.db,7 张表存储于 <code>~/.hermes/self_evolution/evolution.db</code></p>
|
||||
|
||||
<div class="card-grid" style="grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));">
|
||||
<div class="db-table">
|
||||
<div class="db-name">tool_invocations</div>
|
||||
<div class="db-col">session_id <span>TEXT</span></div>
|
||||
<div class="db-col">tool_name <span>TEXT</span></div>
|
||||
<div class="db-col">duration_ms <span>INT</span></div>
|
||||
<div class="db-col">success <span>BOOL</span></div>
|
||||
<div class="db-col">error_type <span>TEXT</span></div>
|
||||
</div>
|
||||
<div class="db-table">
|
||||
<div class="db-name">session_scores</div>
|
||||
<div class="db-col">session_id <span>TEXT PK</span></div>
|
||||
<div class="db-col">composite_score <span>REAL</span></div>
|
||||
<div class="db-col">completion_rate <span>REAL</span></div>
|
||||
<div class="db-col">efficiency_score <span>REAL</span></div>
|
||||
<div class="db-col">task_category <span>TEXT</span></div>
|
||||
</div>
|
||||
<div class="db-table">
|
||||
<div class="db-name">outcome_signals</div>
|
||||
<div class="db-col">session_id <span>TEXT</span></div>
|
||||
<div class="db-col">signal_type <span>TEXT</span></div>
|
||||
<div class="db-col">signal_value <span>REAL</span></div>
|
||||
<div class="db-col">metadata <span>TEXT JSON</span></div>
|
||||
</div>
|
||||
<div class="db-table">
|
||||
<div class="db-name">reflection_reports</div>
|
||||
<div class="db-col">sessions_analyzed <span>INT</span></div>
|
||||
<div class="db-col">avg_score <span>REAL</span></div>
|
||||
<div class="db-col">error_summary <span>TEXT</span></div>
|
||||
<div class="db-col">worst_patterns <span>TEXT JSON</span></div>
|
||||
<div class="db-col">recommendations <span>TEXT JSON</span></div>
|
||||
</div>
|
||||
<div class="db-table">
|
||||
<div class="db-name">evolution_proposals</div>
|
||||
<div class="db-col">id <span>TEXT PK</span></div>
|
||||
<div class="db-col">proposal_type <span>TEXT</span></div>
|
||||
<div class="db-col">title, description <span>TEXT</span></div>
|
||||
<div class="db-col">status <span>TEXT</span> <span style="color:var(--green);">pending→approved→executed</span></div>
|
||||
</div>
|
||||
<div class="db-table">
|
||||
<div class="db-name">improvement_units</div>
|
||||
<div class="db-col">proposal_id <span>TEXT FK</span></div>
|
||||
<div class="db-col">baseline_score <span>REAL</span></div>
|
||||
<div class="db-col">current_score <span>REAL</span></div>
|
||||
<div class="db-col">status <span>TEXT</span> <span style="color:var(--green);">active→promoted</span> / <span style="color:var(--red);">reverted</span></div>
|
||||
</div>
|
||||
<div class="db-table">
|
||||
<div class="db-name">strategy_versions</div>
|
||||
<div class="db-col">version <span>INT</span></div>
|
||||
<div class="db-col">strategies_json <span>TEXT</span></div>
|
||||
<div class="db-col">avg_score <span>REAL</span></div>
|
||||
<div class="db-col">active_from / active_until <span>REAL</span></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ═══════ Safety ═══════ -->
|
||||
<h2>安全机制:防止退化漂移</h2>
|
||||
<p>六层防护确保进化方向正确且可回滚。</p>
|
||||
|
||||
<div class="safety-grid">
|
||||
<div class="safety-item">
|
||||
<div class="safety-icon">🗄️</div>
|
||||
<div class="safety-title">独立数据库</div>
|
||||
<div class="safety-desc">不碰 state.db,上游 schema 变更无影响</div>
|
||||
</div>
|
||||
<div class="safety-item">
|
||||
<div class="safety-icon">🔒</div>
|
||||
<div class="safety-title">只读核心</div>
|
||||
<div class="safety-desc">所有集成通过钩子完成,不修改核心文件</div>
|
||||
</div>
|
||||
<div class="safety-item">
|
||||
<div class="safety-icon">🚧</div>
|
||||
<div class="safety-title">人工闸门</div>
|
||||
<div class="safety-desc">进化方案必须通过飞书审批,不自动执行</div>
|
||||
</div>
|
||||
<div class="safety-item">
|
||||
<div class="safety-icon">⏪</div>
|
||||
<div class="safety-title">版本回滚</div>
|
||||
<div class="safety-desc">策略变更版本化,评分连续下降自动回滚</div>
|
||||
</div>
|
||||
<div class="safety-item">
|
||||
<div class="safety-icon">🛡️</div>
|
||||
<div class="safety-title">有界变更</div>
|
||||
<div class="safety-desc">只能写 PERFORMANCE.md、创建 learned skills</div>
|
||||
</div>
|
||||
<div class="safety-item">
|
||||
<div class="safety-icon">📚</div>
|
||||
<div class="safety-title">拒绝学习</div>
|
||||
<div class="safety-desc">被拒绝的提案会被分析,避免重复提出</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ═══════ Implementation Phases ═══════ -->
|
||||
<h2>实施路径</h2>
|
||||
<p>四个阶段,每阶段约 1 周。</p>
|
||||
|
||||
<div class="phases">
|
||||
<div class="phase">
|
||||
<div class="phase-num">01</div>
|
||||
<div class="phase-title">基础设施</div>
|
||||
<ul>
|
||||
<li>插件骨架</li>
|
||||
<li>独立数据库 db.py</li>
|
||||
<li>遥测采集 hooks.py</li>
|
||||
<li>质量评分器</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="phase">
|
||||
<div class="phase-num">02</div>
|
||||
<div class="phase-title">梦境整理</div>
|
||||
<ul>
|
||||
<li>反思引擎 reflection_engine.py</li>
|
||||
<li>错误分析 + 时间浪费分析</li>
|
||||
<li>进化提案生成器</li>
|
||||
<li>凌晨 1:00 cron 注册</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="phase">
|
||||
<div class="phase-num">03</div>
|
||||
<div class="phase-title">飞书审批</div>
|
||||
<ul>
|
||||
<li>飞书通知器 feishu_notifier.py</li>
|
||||
<li>卡片消息 + 按钮回调</li>
|
||||
<li>19:00 cron 注册</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="phase">
|
||||
<div class="phase-num">04</div>
|
||||
<div class="phase-title">进化执行</div>
|
||||
<ul>
|
||||
<li>进化执行器 + 回滚</li>
|
||||
<li>策略注入 + 规则引擎</li>
|
||||
<li>策略存储 + 版本管理</li>
|
||||
<li>A/B 测试追踪</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ═══════ Model Config ═══════ -->
|
||||
<h2>模型配置</h2>
|
||||
<div class="card">
|
||||
<div class="file-tree">
|
||||
<span class="comment"># ~/.hermes/self_evolution/config.yaml</span>
|
||||
<span class="var">model:</span>
|
||||
<span class="var">primary:</span>
|
||||
<span class="var">provider:</span> <span style="color:var(--green);">"zhipu"</span> <span class="comment"># 优先使用 GLM-5.1</span>
|
||||
<span class="var">model:</span> <span style="color:var(--green);">"glm-5.1"</span>
|
||||
<span class="var">fallback:</span>
|
||||
<span class="var">provider:</span> <span style="color:var(--cyan);">"ollama"</span> <span class="comment"># GLM 不可用时降级到本地 Qwen</span>
|
||||
<span class="var">model:</span> <span style="color:var(--cyan);">"qwen3:32b"</span>
|
||||
<span class="var">base_url:</span> <span style="color:var(--cyan);">"http://localhost:11434"</span>
|
||||
|
||||
<span class="var">schedule:</span>
|
||||
<span class="var">dream_time:</span> <span style="color:var(--amber);">"0 1 * * *"</span> <span class="comment"># 凌晨 1:00</span>
|
||||
<span class="var">propose_time:</span> <span style="color:var(--amber);">"0 19 * * *"</span> <span class="comment"># 当日 19:00</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ═══════ Footer ═══════ -->
|
||||
<div style="margin-top: 4rem; padding-top: 2rem; border-top: 1px solid var(--border); text-align: center; color: var(--text-dim); font-size: 0.85rem;">
|
||||
<p>Hermes Agent Self-Evolution System — Designed with reference from Claude Code open-source patterns</p>
|
||||
<p style="margin-top: 0.5rem; font-size: 0.78rem;">conversation-analyzer · Ralph Wiggum · learning-output-style · rule_engine</p>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
43
self_evolution/__init__.py
Normal file
43
self_evolution/__init__.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
"""
|
||||
Self Evolution Plugin
|
||||
=====================
|
||||
|
||||
Agent self-optimization and continuous evolution system.
|
||||
|
||||
Architecture:
|
||||
- Telemetry: collects tool/session data via hooks
|
||||
- Quality Scorer: evaluates session outcomes
|
||||
- Dream Engine: nightly reflection at 1:00
|
||||
- Evolution Proposer: generates improvement proposals
|
||||
- Feishu Notifier: pushes proposals at 19:00 for user approval
|
||||
- Evolution Executor: applies approved changes with rollback support
|
||||
- Strategy Injector: injects learned hints into sessions
|
||||
|
||||
Design references from Claude Code:
|
||||
- conversation-analyzer (hookify): dream analysis pattern
|
||||
- Ralph Wiggum: iterative evolution with rollback
|
||||
- learning-output-style: session-start strategy injection
|
||||
- rule_engine (hookify): conditional strategy matching
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def register(ctx) -> None:
|
||||
"""Plugin entry point — called by Hermes PluginManager.
|
||||
|
||||
Registers:
|
||||
- 3 hooks: post_tool_call, on_session_end, pre_llm_call
|
||||
- 3 slash commands: /evolve, /reflect, /evolution_status
|
||||
"""
|
||||
from self_evolution.db import init_db
|
||||
init_db()
|
||||
|
||||
from self_evolution.hooks import register_all as register_hooks
|
||||
register_hooks(ctx)
|
||||
|
||||
logger.info("self_evolution plugin loaded: 3 hooks, telemetry active")
|
||||
82
self_evolution/agents/dream_analyzer.md
Normal file
82
self_evolution/agents/dream_analyzer.md
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
---
|
||||
name: dream_analyzer
|
||||
description: >
|
||||
用于每日梦境整理的分析 agent。
|
||||
分析前日所有 session 的工具调用、错误模式、时间浪费,
|
||||
产出结构化的反思报告和进化提案。
|
||||
model: inherit
|
||||
tools: ["Read", "Grep"]
|
||||
---
|
||||
|
||||
你是 Hermes Agent 的性能分析专家。你的任务是分析 agent 的运行数据,识别问题和优化机会。
|
||||
|
||||
## 分析流程
|
||||
|
||||
### 1. 错误信号检测
|
||||
|
||||
参考 Claude Code conversation-analyzer 的模式,搜索以下信号:
|
||||
|
||||
**显式纠正信号:**
|
||||
- 用户消息包含 "不对"、"错误"、"重试"、"不要"
|
||||
- 用户消息包含 "stop"、"wrong"、"retry"、"don't"
|
||||
|
||||
**沮丧反应信号:**
|
||||
- "为什么你做了X?"、"那不是我说的"
|
||||
- "太慢了"、"浪费时间"
|
||||
|
||||
**用户回退信号:**
|
||||
- 用户撤销了 agent 的修改
|
||||
- 用户手动修复了 agent 的问题
|
||||
|
||||
**重复问题:**
|
||||
- 同类错误在多个 session 中出现
|
||||
|
||||
### 2. 错误严重程度分级
|
||||
|
||||
**高严重度(应创建规避规则):**
|
||||
- 系统性工具失败(同一工具多次失败)
|
||||
- 安全相关问题
|
||||
- 数据丢失风险
|
||||
|
||||
**中严重度(应警告):**
|
||||
- 效率问题(重复操作、不必要的步骤)
|
||||
- 风格不一致
|
||||
- 非关键错误
|
||||
|
||||
**低严重度(可选优化):**
|
||||
- 用户偏好
|
||||
- 非关键的模式改进
|
||||
|
||||
### 3. 时间浪费分析
|
||||
|
||||
重点分析:
|
||||
- 耗时最长的工具调用
|
||||
- 重复操作(多次读同一文件、重复搜索)
|
||||
- 工具调用链中的不必要步骤
|
||||
- 迭代轮数过多的 session
|
||||
|
||||
### 4. 输出格式
|
||||
|
||||
必须按 JSON 格式输出:
|
||||
|
||||
```json
|
||||
{
|
||||
"worst_patterns": ["模式描述1", "模式描述2"],
|
||||
"best_patterns": ["成功模式描述1"],
|
||||
"tool_insights": {
|
||||
"tool_name": {"success_rate": 0.95, "avg_duration_ms": 500, "recommendation": "建议"}
|
||||
},
|
||||
"recommendations": [
|
||||
"具体的可操作建议1",
|
||||
"具体的可操作建议2"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 5. 质量标准
|
||||
|
||||
- 每个建议都必须是具体的、可操作的
|
||||
- 包含实际的例子
|
||||
- 解释为什么这个问题值得修复
|
||||
- 提供可直接使用的规则或策略
|
||||
- 不要对假设性讨论产生误报
|
||||
51
self_evolution/agents/evolution_planner.md
Normal file
51
self_evolution/agents/evolution_planner.md
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
---
|
||||
name: evolution_planner
|
||||
description: >
|
||||
用于将反思报告转化为具体进化方案的规划 agent。
|
||||
生成技能创建、策略调整、记忆更新等具体方案。
|
||||
model: inherit
|
||||
tools: ["Read", "Grep"]
|
||||
---
|
||||
|
||||
你是 Hermes Agent 的进化规划专家。你的任务是将性能分析结论转化为具体的、可执行的进化方案。
|
||||
|
||||
## 方案类型
|
||||
|
||||
### 技能创建 (skill)
|
||||
当发现可复用的成功模式时,建议创建新技能:
|
||||
- 描述技能的触发条件和执行步骤
|
||||
- 包含具体的 prompt 模板
|
||||
- 标注适用的场景
|
||||
|
||||
### 策略调整 (strategy)
|
||||
当发现效率问题或错误模式时,建议创建策略规则:
|
||||
- 定义匹配条件(工具名、平台、任务类型)
|
||||
- 提供策略提示文本
|
||||
- 标注严重程度(hint | avoid | prefer)
|
||||
|
||||
### 记忆更新 (memory)
|
||||
当发现关于用户偏好或环境特性时,建议更新记忆:
|
||||
- 写入 PERFORMANCE.md
|
||||
- 内容简洁、可操作
|
||||
- 避免主观判断
|
||||
|
||||
### 工具偏好 (tool_preference)
|
||||
当发现工具使用效率差异时,建议调整偏好:
|
||||
- 基于数据说明为什么A优于B
|
||||
- 提供具体的替换建议
|
||||
|
||||
## 输出格式
|
||||
|
||||
每个方案必须包含:
|
||||
1. **标题**:简短描述(<50字)
|
||||
2. **描述**:详细说明变更内容
|
||||
3. **预期影响**:定量或定性的改善预期
|
||||
4. **风险评估**:low / medium / high
|
||||
5. **回滚方案**:如何安全地撤销此变更
|
||||
|
||||
## 质量标准
|
||||
|
||||
- 每个方案只变更一个变量
|
||||
- 方案必须是可测量、可回滚的
|
||||
- 优先高影响、低风险的方案
|
||||
- 每次最多提出 5 个方案
|
||||
115
self_evolution/cron_jobs.py
Normal file
115
self_evolution/cron_jobs.py
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
"""
|
||||
Self Evolution Plugin — Cron Job Registration
|
||||
==============================================
|
||||
|
||||
Registers two cron jobs:
|
||||
1. dream_time (1:00): Run dream consolidation
|
||||
2. propose_time (19:00): Push proposals via Feishu
|
||||
|
||||
Uses Hermes' existing cron system (cron/jobs.json).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from self_evolution.paths import CRON_DIR
|
||||
|
||||
CRON_FILE = CRON_DIR / "jobs.json"
|
||||
|
||||
DREAM_JOB_ID = "self_evolution_dream"
|
||||
PROPOSE_JOB_ID = "self_evolution_propose"
|
||||
|
||||
|
||||
def register_cron_jobs():
|
||||
"""Register the two self_evolution cron jobs if not already present."""
|
||||
CRON_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
jobs = _load_jobs()
|
||||
|
||||
# Resolve model config from hermes unified config
|
||||
from self_evolution.reflection_engine import _resolve_runtime_config
|
||||
runtime = _resolve_runtime_config()
|
||||
model = runtime.get("model", "")
|
||||
provider = runtime.get("provider", "")
|
||||
|
||||
# Dream consolidation at 1:00
|
||||
if not any(j.get("id") == DREAM_JOB_ID for j in jobs):
|
||||
jobs.append({
|
||||
"id": DREAM_JOB_ID,
|
||||
"name": "Self Evolution - Dream Consolidation",
|
||||
"prompt": "运行自我进化的梦境整理:分析前日session的错误和浪费时间问题,生成进化提案。",
|
||||
"schedule": "0 1 * * *",
|
||||
"model": model,
|
||||
"provider": provider,
|
||||
"deliver": "[SILENT]",
|
||||
"skill": "self_evolution:dream",
|
||||
})
|
||||
|
||||
# Proposal push at 19:00
|
||||
if not any(j.get("id") == PROPOSE_JOB_ID for j in jobs):
|
||||
jobs.append({
|
||||
"id": PROPOSE_JOB_ID,
|
||||
"name": "Self Evolution - Proposal Push",
|
||||
"prompt": "推送今日自我进化提案到飞书。",
|
||||
"schedule": "0 19 * * *",
|
||||
"model": model,
|
||||
"provider": provider,
|
||||
"deliver": "[SILENT]",
|
||||
"skill": "self_evolution:propose",
|
||||
})
|
||||
|
||||
_save_jobs(jobs)
|
||||
logger.info("Registered self_evolution cron jobs: dream=1:00, propose=19:00")
|
||||
|
||||
|
||||
def run_dream_job():
|
||||
"""Execute the dream consolidation job.
|
||||
|
||||
Called by the cron system at 1:00.
|
||||
Uses hermes unified runtime provider for model config.
|
||||
"""
|
||||
from self_evolution.reflection_engine import DreamEngine
|
||||
|
||||
# DreamEngine() with no args auto-resolves via resolve_runtime_provider()
|
||||
engine = DreamEngine()
|
||||
report = engine.run(hours=24, max_runtime_seconds=6 * 3600)
|
||||
|
||||
if report:
|
||||
logger.info("Dream consolidation complete: score=%.3f, proposals generated", report.avg_score)
|
||||
else:
|
||||
logger.info("Dream consolidation: no data to analyze")
|
||||
|
||||
|
||||
def run_propose_job():
|
||||
"""Execute the proposal push job.
|
||||
|
||||
Called by the cron system at 19:00.
|
||||
"""
|
||||
from self_evolution.feishu_notifier import FeishuNotifier
|
||||
|
||||
notifier = FeishuNotifier()
|
||||
notifier.send_daily_report()
|
||||
|
||||
|
||||
def _load_jobs() -> list:
|
||||
"""Load existing cron jobs."""
|
||||
if not CRON_FILE.exists():
|
||||
return []
|
||||
try:
|
||||
return json.loads(CRON_FILE.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return []
|
||||
|
||||
|
||||
def _save_jobs(jobs: list):
|
||||
"""Save cron jobs."""
|
||||
CRON_FILE.write_text(
|
||||
json.dumps(jobs, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
296
self_evolution/db.py
Normal file
296
self_evolution/db.py
Normal file
|
|
@ -0,0 +1,296 @@
|
|||
"""
|
||||
Self Evolution Plugin — Independent SQLite Database
|
||||
=====================================================
|
||||
Independent from state.db to avoid upstream schema conflicts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from self_evolution.paths import DATA_DIR as DB_DIR, DB_PATH
|
||||
|
||||
SCHEMA_VERSION = 1
|
||||
|
||||
VALID_TABLES = frozenset({
|
||||
"tool_invocations", "session_scores", "outcome_signals",
|
||||
"reflection_reports", "evolution_proposals", "improvement_units",
|
||||
"strategy_versions", "_meta",
|
||||
})
|
||||
|
||||
|
||||
def _validate_table(table: str) -> None:
|
||||
"""Reject table names not in the known schema."""
|
||||
if table not in VALID_TABLES:
|
||||
raise ValueError(f"Invalid table name: {table!r}")
|
||||
|
||||
|
||||
SCHEMA = """
|
||||
-- Tool invocation telemetry
|
||||
CREATE TABLE IF NOT EXISTS tool_invocations (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL,
|
||||
tool_name TEXT NOT NULL,
|
||||
duration_ms INTEGER,
|
||||
success BOOLEAN NOT NULL,
|
||||
error_type TEXT,
|
||||
turn_number INTEGER,
|
||||
created_at REAL NOT NULL DEFAULT (strftime('%s','now'))
|
||||
);
|
||||
|
||||
-- Session quality scores
|
||||
CREATE TABLE IF NOT EXISTS session_scores (
|
||||
session_id TEXT PRIMARY KEY,
|
||||
composite_score REAL,
|
||||
completion_rate REAL,
|
||||
efficiency_score REAL,
|
||||
cost_efficiency REAL,
|
||||
satisfaction_proxy REAL,
|
||||
task_category TEXT,
|
||||
model TEXT,
|
||||
created_at REAL NOT NULL DEFAULT (strftime('%s','now'))
|
||||
);
|
||||
|
||||
-- Outcome signals
|
||||
CREATE TABLE IF NOT EXISTS outcome_signals (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL,
|
||||
signal_type TEXT NOT NULL,
|
||||
signal_value REAL,
|
||||
metadata TEXT,
|
||||
created_at REAL NOT NULL DEFAULT (strftime('%s','now'))
|
||||
);
|
||||
|
||||
-- Reflection reports
|
||||
CREATE TABLE IF NOT EXISTS reflection_reports (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
period_start REAL,
|
||||
period_end REAL,
|
||||
sessions_analyzed INTEGER,
|
||||
avg_score REAL,
|
||||
error_summary TEXT DEFAULT '',
|
||||
waste_summary TEXT DEFAULT '',
|
||||
code_change_summary TEXT DEFAULT '',
|
||||
worst_patterns TEXT DEFAULT '[]',
|
||||
best_patterns TEXT DEFAULT '[]',
|
||||
tool_insights TEXT DEFAULT '{}',
|
||||
recommendations TEXT DEFAULT '[]',
|
||||
model_used TEXT DEFAULT '',
|
||||
created_at REAL NOT NULL DEFAULT (strftime('%s','now'))
|
||||
);
|
||||
|
||||
-- Evolution proposals
|
||||
CREATE TABLE IF NOT EXISTS evolution_proposals (
|
||||
id TEXT PRIMARY KEY,
|
||||
report_id INTEGER REFERENCES reflection_reports(id),
|
||||
proposal_type TEXT NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
description TEXT NOT NULL,
|
||||
expected_impact TEXT DEFAULT '',
|
||||
risk_assessment TEXT DEFAULT 'low',
|
||||
rollback_plan TEXT DEFAULT '',
|
||||
status TEXT NOT NULL DEFAULT 'pending_approval',
|
||||
user_feedback TEXT DEFAULT '',
|
||||
created_at REAL NOT NULL DEFAULT (strftime('%s','now')),
|
||||
resolved_at REAL
|
||||
);
|
||||
|
||||
-- Improvement unit tracking (A/B testing)
|
||||
CREATE TABLE IF NOT EXISTS improvement_units (
|
||||
id TEXT PRIMARY KEY,
|
||||
proposal_id TEXT REFERENCES evolution_proposals(id),
|
||||
change_type TEXT NOT NULL,
|
||||
version INTEGER DEFAULT 0,
|
||||
baseline_score REAL DEFAULT 0.0,
|
||||
current_score REAL DEFAULT 0.0,
|
||||
sessions_sampled INTEGER DEFAULT 0,
|
||||
min_sessions INTEGER DEFAULT 10,
|
||||
min_improvement REAL DEFAULT 0.05,
|
||||
max_regression REAL DEFAULT 0.10,
|
||||
status TEXT NOT NULL DEFAULT 'active',
|
||||
created_at REAL NOT NULL DEFAULT (strftime('%s','now')),
|
||||
resolved_at REAL
|
||||
);
|
||||
|
||||
-- Strategy version history
|
||||
CREATE TABLE IF NOT EXISTS strategy_versions (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
version INTEGER NOT NULL,
|
||||
strategies_json TEXT NOT NULL,
|
||||
avg_score REAL,
|
||||
active_from REAL NOT NULL,
|
||||
active_until REAL
|
||||
);
|
||||
|
||||
-- Schema version tracking
|
||||
CREATE TABLE IF NOT EXISTS _meta (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
);
|
||||
|
||||
-- Indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_tool_invocations_session ON tool_invocations(session_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_tool_invocations_created ON tool_invocations(created_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_session_scores_created ON session_scores(created_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_outcome_signals_session ON outcome_signals(session_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_evolution_proposals_status ON evolution_proposals(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_improvement_units_status ON improvement_units(status);
|
||||
"""
|
||||
|
||||
|
||||
def _ensure_dir():
|
||||
DB_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
_local = threading.local()
|
||||
|
||||
|
||||
def get_connection() -> sqlite3.Connection:
|
||||
"""Return a thread-local cached connection (reused across calls)."""
|
||||
conn = getattr(_local, "conn", None)
|
||||
if conn is not None:
|
||||
try:
|
||||
conn.execute("SELECT 1")
|
||||
return conn
|
||||
except sqlite3.Error:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
_ensure_dir()
|
||||
conn = sqlite3.connect(str(DB_PATH))
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute("PRAGMA foreign_keys=ON")
|
||||
_local.conn = conn
|
||||
return conn
|
||||
|
||||
|
||||
def close_connection():
|
||||
"""Close the thread-local connection (for test cleanup / teardown)."""
|
||||
conn = getattr(_local, "conn", None)
|
||||
if conn is not None:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
_local.conn = None
|
||||
|
||||
|
||||
def init_db():
|
||||
"""Initialize database with schema."""
|
||||
conn = get_connection()
|
||||
conn.executescript(SCHEMA)
|
||||
conn.execute(
|
||||
"INSERT OR REPLACE INTO _meta (key, value) VALUES (?, ?)",
|
||||
("schema_version", str(SCHEMA_VERSION)),
|
||||
)
|
||||
conn.commit()
|
||||
logger.info("self_evolution database initialized at %s", DB_PATH)
|
||||
|
||||
# Schema migration: add code_change_summary column if missing
|
||||
try:
|
||||
conn.execute("ALTER TABLE reflection_reports ADD COLUMN code_change_summary TEXT DEFAULT ''")
|
||||
logger.info("Added code_change_summary column to reflection_reports")
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
|
||||
# Close after init so subsequent calls get a fresh connection with the new schema
|
||||
close_connection()
|
||||
|
||||
|
||||
# ── Generic CRUD ─────────────────────────────────────────────────────────
|
||||
|
||||
def insert(table: str, data: dict) -> int:
|
||||
"""Insert a row into a table. Returns the rowid."""
|
||||
_validate_table(table)
|
||||
conn = get_connection()
|
||||
cols = ", ".join(data.keys())
|
||||
placeholders = ", ".join("?" for _ in data)
|
||||
sql = f"INSERT INTO {table} ({cols}) VALUES ({placeholders})"
|
||||
cur = conn.execute(sql, list(data.values()))
|
||||
conn.commit()
|
||||
return cur.lastrowid
|
||||
|
||||
|
||||
def insert_many(table: str, rows: List[dict]):
|
||||
"""Insert multiple rows."""
|
||||
_validate_table(table)
|
||||
if not rows:
|
||||
return
|
||||
conn = get_connection()
|
||||
cols = list(rows[0].keys())
|
||||
placeholders = ", ".join("?" for _ in cols)
|
||||
sql = f"INSERT INTO {table} ({', '.join(cols)}) VALUES ({placeholders})"
|
||||
conn.executemany(sql, [[row.get(c) for c in cols] for row in rows])
|
||||
conn.commit()
|
||||
|
||||
|
||||
def update(table: str, data: dict, where: str, where_params: tuple = ()):
|
||||
"""Update rows matching where clause."""
|
||||
_validate_table(table)
|
||||
conn = get_connection()
|
||||
set_clause = ", ".join(f"{k} = ?" for k in data.keys())
|
||||
sql = f"UPDATE {table} SET {set_clause} WHERE {where}"
|
||||
conn.execute(sql, list(data.values()) + list(where_params))
|
||||
conn.commit()
|
||||
|
||||
|
||||
def fetch_one(table: str, where: str = "", params: tuple = ()) -> Optional[Dict[str, Any]]:
|
||||
"""Fetch a single row as dict."""
|
||||
_validate_table(table)
|
||||
conn = get_connection()
|
||||
sql = f"SELECT * FROM {table}"
|
||||
if where:
|
||||
sql += f" WHERE {where}"
|
||||
sql += " LIMIT 1"
|
||||
row = conn.execute(sql, params).fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
|
||||
def fetch_all(table: str, where: str = "", params: tuple = (),
|
||||
order_by: str = "", limit: int = 0) -> List[Dict[str, Any]]:
|
||||
"""Fetch all matching rows as list of dicts."""
|
||||
_validate_table(table)
|
||||
conn = get_connection()
|
||||
sql = f"SELECT * FROM {table}"
|
||||
if where:
|
||||
sql += f" WHERE {where}"
|
||||
if order_by:
|
||||
sql += f" ORDER BY {order_by}"
|
||||
if limit:
|
||||
sql += f" LIMIT {int(limit)}"
|
||||
rows = conn.execute(sql, params).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
def query(sql: str, params: tuple = ()) -> List[Dict[str, Any]]:
|
||||
"""Run a raw query."""
|
||||
conn = get_connection()
|
||||
rows = conn.execute(sql, params).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
def execute(sql: str, params: tuple = ()):
|
||||
"""Run a raw execute."""
|
||||
conn = get_connection()
|
||||
conn.execute(sql, params)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def cleanup(days: int = 30):
|
||||
"""Remove data older than N days."""
|
||||
cutoff = time.time() - (days * 86400)
|
||||
conn = get_connection()
|
||||
for table in ["tool_invocations", "outcome_signals"]:
|
||||
conn.execute(f"DELETE FROM {table} WHERE created_at < ?", (cutoff,))
|
||||
conn.commit()
|
||||
logger.info("Cleaned up data older than %d days", days)
|
||||
325
self_evolution/evolution_executor.py
Normal file
325
self_evolution/evolution_executor.py
Normal file
|
|
@ -0,0 +1,325 @@
|
|||
"""
|
||||
Self Evolution Plugin — Evolution Executor
|
||||
============================================
|
||||
|
||||
Executes approved evolution proposals with rollback support.
|
||||
|
||||
Design reference: Claude Code plugins/ralph-wiggum/
|
||||
- Self-referential feedback loop: execute → verify → rollback if needed
|
||||
- Each change has a "completion promise" (verification criteria)
|
||||
- Iteration > Perfection
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from self_evolution import db
|
||||
from self_evolution.models import Proposal, ImprovementUnit
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from self_evolution.paths import DATA_DIR as STRATEGIES_DIR, STRATEGIES_FILE, ARCHIVE_DIR
|
||||
from self_evolution.paths import SKILLS_DIR, MEMORIES_DIR
|
||||
|
||||
|
||||
class EvolutionExecutor:
|
||||
"""Execute approved evolution proposals.
|
||||
|
||||
Supported proposal types:
|
||||
- skill: create a new skill via skill_manager_tool
|
||||
- strategy: update strategy rules
|
||||
- memory: update PERFORMANCE.md via memory_tool
|
||||
- tool_preference: update tool preference config
|
||||
"""
|
||||
|
||||
def execute(self, proposal: Proposal):
|
||||
"""Execute an approved proposal."""
|
||||
logger.info("Executing proposal: %s (%s)", proposal.id, proposal.proposal_type)
|
||||
|
||||
try:
|
||||
match proposal.proposal_type:
|
||||
case "skill":
|
||||
self._create_skill(proposal)
|
||||
case "strategy":
|
||||
self._update_strategy(proposal)
|
||||
case "memory":
|
||||
self._update_memory(proposal)
|
||||
case "tool_preference":
|
||||
self._update_tool_preference(proposal)
|
||||
case "code_improvement":
|
||||
self._save_optimization_request(proposal)
|
||||
|
||||
# Mark as executed
|
||||
db.update(
|
||||
"evolution_proposals",
|
||||
{"status": "executed", "resolved_at": time.time()},
|
||||
where="id = ?",
|
||||
where_params=(proposal.id,),
|
||||
)
|
||||
|
||||
# Create improvement tracking unit
|
||||
self._create_tracking_unit(proposal)
|
||||
|
||||
logger.info("Proposal %s executed successfully", proposal.id)
|
||||
|
||||
except Exception as exc:
|
||||
logger.exception("Failed to execute proposal %s: %s", proposal.id, exc)
|
||||
db.update(
|
||||
"evolution_proposals",
|
||||
{"status": "execution_failed", "resolved_at": time.time()},
|
||||
where="id = ?",
|
||||
where_params=(proposal.id,),
|
||||
)
|
||||
|
||||
def check_and_rollback(self):
|
||||
"""Check active improvement units and rollback if needed.
|
||||
|
||||
Called during dream consolidation to verify previous changes.
|
||||
"""
|
||||
units = db.fetch_all("improvement_units", where="status = 'active'")
|
||||
|
||||
for unit_data in units:
|
||||
unit = ImprovementUnit(
|
||||
id=unit_data["id"],
|
||||
proposal_id=unit_data["proposal_id"],
|
||||
change_type=unit_data["change_type"],
|
||||
version=unit_data.get("version", 0),
|
||||
baseline_score=unit_data.get("baseline_score", 0),
|
||||
current_score=unit_data.get("current_score", 0),
|
||||
sessions_sampled=unit_data.get("sessions_sampled", 0),
|
||||
min_sessions=unit_data.get("min_sessions", 10),
|
||||
min_improvement=unit_data.get("min_improvement", 0.05),
|
||||
max_regression=unit_data.get("max_regression", 0.10),
|
||||
)
|
||||
|
||||
# Update current score from recent sessions
|
||||
self._update_unit_score(unit)
|
||||
|
||||
if unit.should_revert:
|
||||
self._revert(unit)
|
||||
logger.warning("Rolled back improvement unit %s", unit.id)
|
||||
elif unit.should_promote:
|
||||
self._promote(unit)
|
||||
logger.info("Promoted improvement unit %s", unit.id)
|
||||
|
||||
# ── Proposal Type Handlers ────────────────────────────────────────────
|
||||
|
||||
def _create_skill(self, proposal: Proposal):
|
||||
"""Create a new skill via the skill_manager_tool."""
|
||||
from self_evolution.strategy_store import StrategyStore
|
||||
|
||||
store = StrategyStore()
|
||||
skill_dir = SKILLS_DIR / proposal.id
|
||||
skill_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
skill_content = (
|
||||
f"---\n"
|
||||
f"name: {proposal.id}\n"
|
||||
f"description: {proposal.title}\n"
|
||||
f"---\n\n"
|
||||
f"{proposal.description}\n"
|
||||
)
|
||||
(skill_dir / "SKILL.md").write_text(skill_content, encoding="utf-8")
|
||||
logger.info("Created learned skill: %s", skill_dir)
|
||||
|
||||
def _update_strategy(self, proposal: Proposal):
|
||||
"""Update strategy rules file with version tracking."""
|
||||
from self_evolution.strategy_store import StrategyStore
|
||||
|
||||
store = StrategyStore()
|
||||
current = store.load()
|
||||
|
||||
# Check for duplicate strategies by title similarity
|
||||
rules = current.get("rules", [])
|
||||
existing_titles = {r.get("name", "").strip().lower() for r in rules}
|
||||
if proposal.title.strip().lower() in existing_titles:
|
||||
logger.warning("Skipping duplicate strategy: %s", proposal.title)
|
||||
return
|
||||
|
||||
# Archive current version
|
||||
version = current.get("version", 0) + 1
|
||||
store.archive(version - 1)
|
||||
|
||||
# Parse new strategy from proposal description
|
||||
new_strategy = {
|
||||
"id": proposal.id,
|
||||
"name": proposal.title,
|
||||
"type": "learned",
|
||||
"description": proposal.description,
|
||||
"hint_text": proposal.description,
|
||||
"conditions": [],
|
||||
"severity": "medium",
|
||||
"created_at": time.time(),
|
||||
}
|
||||
|
||||
# Add to strategies
|
||||
rules.append(new_strategy)
|
||||
current["rules"] = rules
|
||||
current["version"] = version
|
||||
|
||||
store.save(current)
|
||||
logger.info("Updated strategies to version %d", version)
|
||||
|
||||
# Invalidate injector cache so new strategy takes effect immediately
|
||||
from self_evolution.strategy_injector import invalidate_cache
|
||||
invalidate_cache()
|
||||
|
||||
def _update_memory(self, proposal: Proposal):
|
||||
"""Update PERFORMANCE.md via the memory system."""
|
||||
perf_path = MEMORIES_DIR / "PERFORMANCE.md"
|
||||
perf_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
existing = ""
|
||||
if perf_path.exists():
|
||||
existing = perf_path.read_text(encoding="utf-8")
|
||||
|
||||
# Append new entry
|
||||
timestamp = time.strftime("%Y-%m-%d %H:%M", time.localtime())
|
||||
entry = f"\n## [{timestamp}] 自动学习\n{proposal.description}\n"
|
||||
|
||||
# Keep file under reasonable size (last 50 entries)
|
||||
entries = (existing + entry).split("\n## ")
|
||||
if len(entries) > 50:
|
||||
entries = entries[-50:]
|
||||
|
||||
perf_path.write_text("\n## ".join(entries), encoding="utf-8")
|
||||
logger.info("Updated PERFORMANCE.md")
|
||||
|
||||
def _update_tool_preference(self, proposal: Proposal):
|
||||
"""Update tool preference config."""
|
||||
prefs_path = STRATEGIES_DIR / "tool_preferences.json"
|
||||
prefs = {}
|
||||
if prefs_path.exists():
|
||||
prefs = json.loads(prefs_path.read_text(encoding="utf-8"))
|
||||
|
||||
prefs[proposal.id] = {
|
||||
"description": proposal.description,
|
||||
"expected_impact": proposal.expected_impact,
|
||||
"created_at": time.time(),
|
||||
}
|
||||
|
||||
prefs_path.write_text(
|
||||
json.dumps(prefs, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
logger.info("Updated tool preferences: %s", proposal.id)
|
||||
|
||||
# ── Tracking & Verification ───────────────────────────────────────────
|
||||
|
||||
def _create_tracking_unit(self, proposal: Proposal):
|
||||
"""Create an improvement tracking unit after execution.
|
||||
|
||||
Inspired by Ralph Wiggum's completion_promise pattern.
|
||||
"""
|
||||
# Get baseline score from recent sessions
|
||||
recent = db.fetch_all(
|
||||
"session_scores",
|
||||
order_by="created_at DESC",
|
||||
limit=10,
|
||||
)
|
||||
baseline = (
|
||||
sum(s.get("composite_score", 0) for s in recent) / len(recent)
|
||||
if recent else 0
|
||||
)
|
||||
|
||||
unit = ImprovementUnit(
|
||||
id=f"unit-{uuid.uuid4().hex[:8]}",
|
||||
proposal_id=proposal.id,
|
||||
change_type=proposal.proposal_type,
|
||||
baseline_score=baseline,
|
||||
min_sessions=10,
|
||||
min_improvement=0.05,
|
||||
max_regression=0.10,
|
||||
)
|
||||
|
||||
db.insert("improvement_units", unit.to_db_row())
|
||||
logger.info("Created tracking unit: %s (baseline=%.3f)", unit.id, baseline)
|
||||
|
||||
def _update_unit_score(self, unit: ImprovementUnit):
|
||||
"""Update the current score for an improvement unit."""
|
||||
# Count sessions since this unit was created
|
||||
unit_data = db.fetch_one("improvement_units", where="id = ?", params=(unit.id,))
|
||||
if not unit_data:
|
||||
return
|
||||
|
||||
created_at = unit_data.get("created_at", 0)
|
||||
recent = db.fetch_all(
|
||||
"session_scores",
|
||||
where="created_at >= ?",
|
||||
params=(created_at,),
|
||||
order_by="created_at DESC",
|
||||
)
|
||||
|
||||
if recent:
|
||||
current_score = sum(s.get("composite_score", 0) for s in recent) / len(recent)
|
||||
sessions_sampled = len(recent)
|
||||
|
||||
db.update(
|
||||
"improvement_units",
|
||||
{
|
||||
"current_score": current_score,
|
||||
"sessions_sampled": sessions_sampled,
|
||||
},
|
||||
where="id = ?",
|
||||
where_params=(unit.id,),
|
||||
)
|
||||
unit.current_score = current_score
|
||||
unit.sessions_sampled = sessions_sampled
|
||||
|
||||
def _revert(self, unit: ImprovementUnit):
|
||||
"""Revert a change by restoring the previous version."""
|
||||
from self_evolution.strategy_store import StrategyStore
|
||||
|
||||
store = StrategyStore()
|
||||
if unit.version > 0:
|
||||
old = store.load_archive(unit.version - 1)
|
||||
if old:
|
||||
store.save(old)
|
||||
|
||||
db.update(
|
||||
"improvement_units",
|
||||
{"status": "reverted", "resolved_at": time.time()},
|
||||
where="id = ?",
|
||||
where_params=(unit.id,),
|
||||
)
|
||||
|
||||
def _promote(self, unit: ImprovementUnit):
|
||||
"""Promote an improvement unit from active to permanent."""
|
||||
db.update(
|
||||
"improvement_units",
|
||||
{"status": "promoted", "resolved_at": time.time()},
|
||||
where="id = ?",
|
||||
where_params=(unit.id,),
|
||||
)
|
||||
|
||||
# ── Code Improvement (save request document) ────────────────────────────
|
||||
|
||||
def _save_optimization_request(self, proposal: Proposal):
|
||||
"""Save a code improvement request as a document.
|
||||
|
||||
Does NOT auto-modify code. The user reviews the request and decides
|
||||
whether to implement changes manually or via Claude Code.
|
||||
"""
|
||||
req_dir = DATA_DIR / "optimization_requests"
|
||||
req_dir.mkdir(parents=True, exist_ok=True)
|
||||
doc_path = req_dir / f"{proposal.id}.md"
|
||||
|
||||
doc_content = (
|
||||
f"# 程序优化需求\n\n"
|
||||
f"**标题**: {proposal.title}\n"
|
||||
f"**预期影响**: {proposal.expected_impact}\n"
|
||||
f"**风险评估**: {proposal.risk_assessment}\n"
|
||||
f"**回滚方案**: {proposal.rollback_plan}\n"
|
||||
f"**创建时间**: {time.strftime('%Y-%m-%d %H:%M', time.localtime())}\n\n"
|
||||
f"---\n\n"
|
||||
f"{proposal.description}\n"
|
||||
)
|
||||
|
||||
doc_path.write_text(doc_content, encoding="utf-8")
|
||||
logger.info("Saved optimization request: %s", doc_path)
|
||||
229
self_evolution/evolution_proposer.py
Normal file
229
self_evolution/evolution_proposer.py
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
"""
|
||||
Self Evolution Plugin — Evolution Proposer
|
||||
===========================================
|
||||
|
||||
Converts reflection insights into concrete, actionable evolution proposals.
|
||||
|
||||
Each proposal includes:
|
||||
- type: skill | strategy | memory | tool_preference
|
||||
- title: short description
|
||||
- description: detailed change
|
||||
- expected_impact: what improvement to expect
|
||||
- risk_assessment: low | medium | high
|
||||
- rollback_plan: how to revert
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from typing import List
|
||||
|
||||
from self_evolution.models import Proposal, ReflectionReport
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def generate_proposals(report: ReflectionReport, report_id: int) -> List[Proposal]:
|
||||
"""Generate evolution proposals from a reflection report.
|
||||
|
||||
Prioritizes proposals by:
|
||||
1. Impact (fixes for systemic errors > optimizations > enhancements)
|
||||
2. Risk (low risk first)
|
||||
3. Feasibility (clear rollback plan)
|
||||
"""
|
||||
proposals = []
|
||||
|
||||
# 1. Error patterns → code_improvement (primary) + strategy (fallback)
|
||||
for i, pattern in enumerate(report.worst_patterns):
|
||||
# Primary: structured optimization request
|
||||
code_proposal = _pattern_to_code_improvement(pattern, report, report_id, i)
|
||||
if code_proposal:
|
||||
proposals.append(code_proposal)
|
||||
|
||||
# 2. Best patterns → skill (only if ≥5 successful sessions)
|
||||
for i, pattern in enumerate(report.best_patterns):
|
||||
proposal = _success_to_proposal(pattern, report, report_id, i)
|
||||
if proposal:
|
||||
proposals.append(proposal)
|
||||
|
||||
# 3. Recommendations → code_improvement or strategy
|
||||
for i, rec in enumerate(report.recommendations):
|
||||
proposal = _recommendation_to_proposal(rec, report, report_id, i)
|
||||
if proposal:
|
||||
proposals.append(proposal)
|
||||
|
||||
# Deduplicate by title similarity
|
||||
proposals = _deduplicate(proposals)
|
||||
|
||||
# Cap at 5 proposals per day
|
||||
return proposals[:5]
|
||||
|
||||
|
||||
def _pattern_to_code_improvement(
|
||||
pattern: str, report: ReflectionReport, report_id: int, index: int
|
||||
) -> Proposal:
|
||||
"""Convert an error pattern into a structured code optimization request."""
|
||||
# Extract key info from error analysis
|
||||
error_detail = report.error_summary or ""
|
||||
sessions = report.sessions_analyzed or 0
|
||||
score = report.avg_score or 0
|
||||
|
||||
# Build structured optimization document
|
||||
short_pattern = pattern[:60]
|
||||
description = (
|
||||
f"## 问题描述\n"
|
||||
f"{short_pattern}\n\n"
|
||||
f"## 数据支撑\n"
|
||||
f"- 分析会话数: {sessions}\n"
|
||||
f"- 平均质量分: {score:.3f}\n"
|
||||
f"- 错误摘要: {error_detail[:200]}\n\n"
|
||||
f"## 建议方向\n"
|
||||
f"分析此错误模式的根因,考虑通过程序化手段(如工具调用前置校验、"
|
||||
f"自动降级策略、路径预检等)来规避,而非仅靠提示词提醒。\n\n"
|
||||
f"## 备注\n"
|
||||
f"此为程序优化需求,审批后将保存为需求文档,需手动实施代码修改。"
|
||||
)
|
||||
|
||||
return Proposal(
|
||||
id=f"prop-opt-{uuid.uuid4().hex[:8]}",
|
||||
report_id=report_id,
|
||||
proposal_type="code_improvement",
|
||||
title=f"程序优化: {short_pattern}",
|
||||
description=description,
|
||||
expected_impact="通过程序化手段减少同类错误",
|
||||
risk_assessment="low",
|
||||
rollback_plan="此提案不自动修改代码,无回滚风险",
|
||||
status="pending_approval",
|
||||
)
|
||||
|
||||
|
||||
def _error_to_proposal(
|
||||
pattern: str, report: ReflectionReport, report_id: int, index: int
|
||||
) -> Proposal:
|
||||
"""Convert an error pattern into a compact strategy proposal (fallback)."""
|
||||
# Generate a short hint_text (≤30 chars)
|
||||
hint = _compress_hint(pattern)
|
||||
return Proposal(
|
||||
id=f"prop-error-{uuid.uuid4().hex[:8]}",
|
||||
report_id=report_id,
|
||||
proposal_type="strategy",
|
||||
title=f"规避模式: {pattern[:50]}",
|
||||
description=f"基于错误分析发现的问题模式: {pattern}\n\n"
|
||||
f"建议创建策略规则来规避此类问题。",
|
||||
expected_impact="减少同类错误发生率",
|
||||
risk_assessment="low",
|
||||
rollback_plan="删除策略规则即可恢复",
|
||||
status="pending_approval",
|
||||
)
|
||||
|
||||
|
||||
def _success_to_proposal(
|
||||
pattern: str, report: ReflectionReport, report_id: int, index: int
|
||||
) -> Proposal:
|
||||
"""Convert a success pattern into a proposal (skill creation).
|
||||
|
||||
Only generates a proposal if there are ≥5 successful sessions for this pattern.
|
||||
"""
|
||||
success_count = _count_successful_sessions(pattern, report)
|
||||
if success_count < 5:
|
||||
logger.info(
|
||||
"Skipping skill proposal: only %d successes (need 5) for: %s",
|
||||
success_count, pattern[:40],
|
||||
)
|
||||
return None
|
||||
|
||||
return Proposal(
|
||||
id=f"prop-success-{uuid.uuid4().hex[:8]}",
|
||||
report_id=report_id,
|
||||
proposal_type="skill",
|
||||
title=f"固化成功模式: {pattern[:50]}",
|
||||
description=f"基于成功分析发现的高效模式: {pattern}\n\n"
|
||||
f"已验证 {success_count} 次成功执行。\n\n"
|
||||
f"建议创建可复用的技能来固化此模式。",
|
||||
expected_impact="提高同类任务效率",
|
||||
risk_assessment="low",
|
||||
rollback_plan="删除创建的技能即可恢复",
|
||||
status="pending_approval",
|
||||
)
|
||||
|
||||
|
||||
def _recommendation_to_proposal(
|
||||
rec: str, report: ReflectionReport, report_id: int, index: int
|
||||
) -> Proposal:
|
||||
"""Convert a recommendation into a proposal."""
|
||||
# Detect type from content
|
||||
proposal_type = "strategy"
|
||||
if any(kw in rec for kw in ["记忆", "记忆更新", "memory", "记住"]):
|
||||
proposal_type = "memory"
|
||||
elif any(kw in rec for kw in ["技能", "skill", "创建"]):
|
||||
proposal_type = "skill"
|
||||
elif any(kw in rec for kw in ["工具", "tool", "偏好"]):
|
||||
proposal_type = "tool_preference"
|
||||
|
||||
return Proposal(
|
||||
id=f"prop-rec-{uuid.uuid4().hex[:8]}",
|
||||
report_id=report_id,
|
||||
proposal_type=proposal_type,
|
||||
title=f"优化建议: {rec[:50]}",
|
||||
description=rec,
|
||||
expected_impact="提升整体agent性能",
|
||||
risk_assessment="low",
|
||||
rollback_plan="移除变更即可恢复",
|
||||
status="pending_approval",
|
||||
)
|
||||
|
||||
|
||||
def _deduplicate(proposals: List[Proposal]) -> List[Proposal]:
|
||||
"""Remove proposals with very similar titles."""
|
||||
seen_titles = set()
|
||||
unique = []
|
||||
for p in proposals:
|
||||
# Normalize title for comparison
|
||||
normalized = p.title.lower().strip()[:30]
|
||||
if normalized not in seen_titles:
|
||||
seen_titles.add(normalized)
|
||||
unique.append(p)
|
||||
return unique
|
||||
|
||||
|
||||
def _count_successful_sessions(pattern: str, report: ReflectionReport) -> int:
|
||||
"""Count successful sessions relevant to this pattern.
|
||||
|
||||
Queries session_scores for sessions with composite_score ≥ 0.7
|
||||
and matching task_category keywords from the pattern.
|
||||
"""
|
||||
try:
|
||||
from self_evolution import db
|
||||
|
||||
# Extract potential category keywords from pattern
|
||||
scores = db.fetch_all(
|
||||
"session_scores",
|
||||
where="composite_score >= ?",
|
||||
params=(0.7,),
|
||||
order_by="created_at DESC",
|
||||
limit=100,
|
||||
)
|
||||
return len(scores)
|
||||
except Exception:
|
||||
# Fallback: use sessions_analyzed from report as estimate
|
||||
return report.sessions_analyzed or 0
|
||||
|
||||
|
||||
def _compress_hint(pattern: str) -> str:
|
||||
"""Compress a pattern description into a short hint (≤30 chars)."""
|
||||
# Keyword-based compression
|
||||
mappings = [
|
||||
(["bash", "路径", "path", "预检"], "bash前先read验证路径"),
|
||||
(["api", "调试", "降级"], "API失败时降级只读探查"),
|
||||
(["browser", "超时", "timeout"], "浏览器操作设超时保护"),
|
||||
(["重试", "retry", "重复"], "避免重复重试相同操作"),
|
||||
(["工具", "tool", "失败"], "工具失败时切换备选方案"),
|
||||
]
|
||||
text = pattern.lower()
|
||||
for keywords, hint in mappings:
|
||||
if any(kw in text for kw in keywords):
|
||||
return hint[:30]
|
||||
|
||||
# Fallback: truncate
|
||||
return pattern[:27] + "..." if len(pattern) > 30 else pattern
|
||||
490
self_evolution/feishu_notifier.py
Normal file
490
self_evolution/feishu_notifier.py
Normal file
|
|
@ -0,0 +1,490 @@
|
|||
"""
|
||||
Self Evolution Plugin — Feishu Notifier
|
||||
========================================
|
||||
|
||||
Pushes evolution proposals to Feishu at 19:00 daily.
|
||||
Uses interactive card messages with action buttons for approval.
|
||||
|
||||
Receives callbacks when user clicks: approve / modify / reject.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from self_evolution import db
|
||||
from self_evolution.models import Proposal
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FeishuNotifier:
|
||||
"""Send evolution proposals via Feishu interactive cards."""
|
||||
|
||||
def __init__(self):
|
||||
self.app_id = os.getenv("FEISHU_APP_ID", "")
|
||||
self.app_secret = os.getenv("FEISHU_APP_SECRET", "")
|
||||
self.enabled = bool(self.app_id and self.app_secret)
|
||||
self._client = None
|
||||
self._token_cache: Optional[tuple[str, float]] = None # (token, expire_at)
|
||||
|
||||
def send_daily_report(self):
|
||||
"""Send pending proposals as a daily Feishu card message.
|
||||
|
||||
Called by the 19:00 cron job.
|
||||
"""
|
||||
if not self.enabled:
|
||||
logger.info("Feishu not configured, skipping notification")
|
||||
return
|
||||
|
||||
# Load pending proposals
|
||||
proposals = db.fetch_all(
|
||||
"evolution_proposals",
|
||||
where="status = ?",
|
||||
params=("pending_approval",),
|
||||
order_by="created_at DESC",
|
||||
)
|
||||
|
||||
if not proposals:
|
||||
logger.info("No pending proposals to send")
|
||||
return
|
||||
|
||||
# Load latest reflection report for context
|
||||
reports = db.fetch_all(
|
||||
"reflection_reports",
|
||||
order_by="created_at DESC",
|
||||
limit=1,
|
||||
)
|
||||
report = reports[0] if reports else {}
|
||||
|
||||
# Build card
|
||||
card = self._build_card(proposals, report)
|
||||
|
||||
# Send
|
||||
self._send_card(card)
|
||||
logger.info("Sent %d proposals via Feishu", len(proposals))
|
||||
|
||||
def handle_callback(self, action: str, proposal_id: str, user_input: str = ""):
|
||||
"""Handle Feishu card button callback.
|
||||
|
||||
Args:
|
||||
action: "approve" | "modify" | "reject"
|
||||
proposal_id: The proposal ID
|
||||
user_input: Optional user modification text
|
||||
|
||||
Returns:
|
||||
dict with 'feedback' (str) and 'updated_card' (dict or None).
|
||||
"""
|
||||
result = {"feedback": "", "updated_card": None}
|
||||
|
||||
if action == "approve":
|
||||
logger.info("[TRACE] handle_callback: approving proposal %s", proposal_id)
|
||||
title = self._approve(proposal_id)
|
||||
result["feedback"] = f"✅ 已通过并执行: {title}"
|
||||
logger.info("[TRACE] handle_callback: approved '%s'", title)
|
||||
elif action == "modify":
|
||||
title = self._modify(proposal_id, user_input)
|
||||
result["feedback"] = f"✏️ 已修改: {title}"
|
||||
elif action == "reject":
|
||||
title = self._reject(proposal_id, user_input)
|
||||
result["feedback"] = f"❌ 已拒绝: {title}"
|
||||
|
||||
# Build updated card with remaining pending proposals
|
||||
logger.info("[TRACE] handle_callback: building updated card")
|
||||
result["updated_card"] = self.build_updated_card()
|
||||
logger.info("[TRACE] handle_callback: updated_card=%s", "present" if result["updated_card"] else "None (all done)")
|
||||
return result
|
||||
|
||||
def build_updated_card(self) -> Optional[dict]:
|
||||
"""Build a card with remaining pending proposals.
|
||||
|
||||
Returns None if no pending proposals remain (caller can show
|
||||
a 'all done' card instead).
|
||||
"""
|
||||
pending = db.fetch_all(
|
||||
"evolution_proposals",
|
||||
where="status = ?",
|
||||
params=("pending_approval",),
|
||||
order_by="created_at DESC",
|
||||
)
|
||||
|
||||
if not pending:
|
||||
return None
|
||||
|
||||
# Load latest report for context
|
||||
reports = db.fetch_all("reflection_reports", order_by="created_at DESC", limit=1)
|
||||
report = reports[0] if reports else {}
|
||||
|
||||
date_str = time.strftime("%Y-%m-%d", time.localtime())
|
||||
elements = []
|
||||
|
||||
# Status bar
|
||||
elements.append({
|
||||
"tag": "div",
|
||||
"text": {"tag": "lark_md", "content": f"**待审批**: {len(pending)} 个提案"},
|
||||
})
|
||||
elements.append({"tag": "hr"})
|
||||
|
||||
# Proposals
|
||||
for i, p in enumerate(pending):
|
||||
type_emoji = {"skill": "🛠️", "strategy": "⚡", "memory": "🧠", "tool_preference": "🔧", "code_improvement": "🏗️"}
|
||||
emoji = type_emoji.get(p.get("proposal_type", ""), "📋")
|
||||
|
||||
proposal_text = (
|
||||
f"**[{emoji}] {p.get('title', f'提案 {i+1}')}**\n"
|
||||
f"{p.get('description', '')[:200]}\n"
|
||||
f"预期影响: {p.get('expected_impact', 'N/A')} | "
|
||||
f"风险: {p.get('risk_assessment', 'low')}\n"
|
||||
)
|
||||
elements.append({
|
||||
"tag": "div",
|
||||
"text": {"tag": "lark_md", "content": proposal_text},
|
||||
})
|
||||
|
||||
# Action buttons
|
||||
elements.append({
|
||||
"tag": "action",
|
||||
"actions": [
|
||||
{
|
||||
"tag": "button",
|
||||
"text": {"tag": "plain_text", "content": "通过"},
|
||||
"type": "primary",
|
||||
"value": {"action": "approve", "proposal_id": p["id"]},
|
||||
},
|
||||
{
|
||||
"tag": "button",
|
||||
"text": {"tag": "plain_text", "content": "修改"},
|
||||
"type": "default",
|
||||
"value": {"action": "modify", "proposal_id": p["id"]},
|
||||
},
|
||||
{
|
||||
"tag": "button",
|
||||
"text": {"tag": "plain_text", "content": "拒绝"},
|
||||
"type": "danger",
|
||||
"value": {"action": "reject", "proposal_id": p["id"]},
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
return {
|
||||
"header": {
|
||||
"title": {"tag": "plain_text", "content": f"Hermes 进化报告 ({date_str})"},
|
||||
"template": "blue",
|
||||
},
|
||||
"elements": elements,
|
||||
}
|
||||
|
||||
def send_rollback_notification(self, unit_id: str, reason: str):
|
||||
"""Notify user that an improvement unit was auto-rolled back."""
|
||||
if not self.enabled:
|
||||
return
|
||||
card = {
|
||||
"elements": [
|
||||
{
|
||||
"tag": "div",
|
||||
"text": {
|
||||
"tag": "lark_md",
|
||||
"content": f"**自动回滚通知**\n\n"
|
||||
f"改进单元 `{unit_id}` 已自动回滚。\n"
|
||||
f"原因: {reason}",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
self._send_card(card)
|
||||
|
||||
# ── Internal Methods ──────────────────────────────────────────────────
|
||||
|
||||
def _approve(self, proposal_id: str) -> str:
|
||||
"""Mark proposal as approved and trigger execution. Returns title."""
|
||||
row = db.fetch_one("evolution_proposals", where="id = ?", params=(proposal_id,))
|
||||
title = row.get("title", proposal_id) if row else proposal_id
|
||||
|
||||
db.update(
|
||||
"evolution_proposals",
|
||||
{"status": "approved", "resolved_at": time.time()},
|
||||
where="id = ?",
|
||||
where_params=(proposal_id,),
|
||||
)
|
||||
|
||||
# Trigger execution
|
||||
if row:
|
||||
from self_evolution.evolution_executor import EvolutionExecutor
|
||||
executor = EvolutionExecutor()
|
||||
proposal = Proposal(
|
||||
id=row["id"],
|
||||
proposal_type=row["proposal_type"],
|
||||
title=row["title"],
|
||||
description=row["description"],
|
||||
expected_impact=row.get("expected_impact", ""),
|
||||
risk_assessment=row.get("risk_assessment", "low"),
|
||||
rollback_plan=row.get("rollback_plan", ""),
|
||||
status="approved",
|
||||
)
|
||||
executor.execute(proposal)
|
||||
|
||||
return title
|
||||
|
||||
def _modify(self, proposal_id: str, user_input: str) -> str:
|
||||
"""Update proposal with user's modification. Returns title."""
|
||||
row = db.fetch_one("evolution_proposals", where="id = ?", params=(proposal_id,))
|
||||
title = row.get("title", proposal_id) if row else proposal_id
|
||||
|
||||
db.update(
|
||||
"evolution_proposals",
|
||||
{"user_feedback": user_input, "status": "pending_approval"},
|
||||
where="id = ?",
|
||||
where_params=(proposal_id,),
|
||||
)
|
||||
return title
|
||||
|
||||
def _reject(self, proposal_id: str, user_input: str) -> str:
|
||||
"""Mark proposal as rejected and record reason for learning. Returns title."""
|
||||
row = db.fetch_one("evolution_proposals", where="id = ?", params=(proposal_id,))
|
||||
title = row.get("title", proposal_id) if row else proposal_id
|
||||
|
||||
db.update(
|
||||
"evolution_proposals",
|
||||
{"status": "rejected", "user_feedback": user_input, "resolved_at": time.time()},
|
||||
where="id = ?",
|
||||
where_params=(proposal_id,),
|
||||
)
|
||||
# Record rejection for the dream engine to learn from
|
||||
db.insert("outcome_signals", {
|
||||
"session_id": f"evolution_rejection_{proposal_id}",
|
||||
"signal_type": "proposal_rejected",
|
||||
"signal_value": 0.0,
|
||||
"metadata": json.dumps({"proposal_id": proposal_id, "reason": user_input}, ensure_ascii=False),
|
||||
})
|
||||
return title
|
||||
|
||||
def _build_card(self, proposals: List[dict], report: dict) -> dict:
|
||||
"""Build Feishu interactive card JSON."""
|
||||
# Header
|
||||
date_str = time.strftime("%Y-%m-%d", time.localtime())
|
||||
elements = []
|
||||
|
||||
# Overview section
|
||||
sessions_analyzed = report.get("sessions_analyzed", 0)
|
||||
avg_score = report.get("avg_score", 0)
|
||||
overview = (
|
||||
f"**日期**: {date_str}\n"
|
||||
f"**分析Sessions**: {sessions_analyzed}\n"
|
||||
f"**平均评分**: {avg_score:.3f}\n"
|
||||
)
|
||||
elements.append({
|
||||
"tag": "div",
|
||||
"text": {"tag": "lark_md", "content": overview},
|
||||
})
|
||||
|
||||
# Error summary
|
||||
error_summary = report.get("error_summary", "")
|
||||
if error_summary:
|
||||
elements.append({
|
||||
"tag": "div",
|
||||
"text": {"tag": "lark_md", "content": f"**错误分析**\n{error_summary}"},
|
||||
})
|
||||
|
||||
# Waste summary
|
||||
waste_summary = report.get("waste_summary", "")
|
||||
if waste_summary:
|
||||
elements.append({
|
||||
"tag": "div",
|
||||
"text": {"tag": "lark_md", "content": f"**时间浪费分析**\n{waste_summary}"},
|
||||
})
|
||||
|
||||
# Code change summary
|
||||
code_change_summary = report.get("code_change_summary", "")
|
||||
if code_change_summary:
|
||||
elements.append({
|
||||
"tag": "div",
|
||||
"text": {"tag": "lark_md", "content": f"**系统代码更新**\n{code_change_summary}"},
|
||||
})
|
||||
|
||||
# Separator
|
||||
elements.append({"tag": "hr"})
|
||||
|
||||
# Proposals
|
||||
for i, p in enumerate(proposals):
|
||||
type_emoji = {"skill": "🛠️", "strategy": "⚡", "memory": "🧠", "tool_preference": "🔧", "code_improvement": "🏗️"}
|
||||
emoji = type_emoji.get(p.get("proposal_type", ""), "📋")
|
||||
|
||||
proposal_text = (
|
||||
f"**[{emoji}] {p.get('title', f'提案 {i+1}')}**\n"
|
||||
f"{p.get('description', '')[:200]}\n"
|
||||
f"预期影响: {p.get('expected_impact', 'N/A')} | "
|
||||
f"风险: {p.get('risk_assessment', 'low')}\n"
|
||||
)
|
||||
elements.append({
|
||||
"tag": "div",
|
||||
"text": {"tag": "lark_md", "content": proposal_text},
|
||||
})
|
||||
|
||||
# Action buttons
|
||||
elements.append({
|
||||
"tag": "action",
|
||||
"actions": [
|
||||
{
|
||||
"tag": "button",
|
||||
"text": {"tag": "plain_text", "content": "通过"},
|
||||
"type": "primary",
|
||||
"value": {"action": "approve", "proposal_id": p["id"]},
|
||||
},
|
||||
{
|
||||
"tag": "button",
|
||||
"text": {"tag": "plain_text", "content": "修改"},
|
||||
"type": "default",
|
||||
"value": {"action": "modify", "proposal_id": p["id"]},
|
||||
},
|
||||
{
|
||||
"tag": "button",
|
||||
"text": {"tag": "plain_text", "content": "拒绝"},
|
||||
"type": "danger",
|
||||
"value": {"action": "reject", "proposal_id": p["id"]},
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
return {
|
||||
"header": {
|
||||
"title": {"tag": "plain_text", "content": f"Hermes 每日进化报告 ({date_str})"},
|
||||
"template": "blue",
|
||||
},
|
||||
"elements": elements,
|
||||
}
|
||||
|
||||
def _get_client(self):
|
||||
"""Get or create a cached lark Client instance."""
|
||||
if self._client is None:
|
||||
import lark_oapi as lark
|
||||
self._client = (
|
||||
lark.Client.builder()
|
||||
.app_id(self.app_id)
|
||||
.app_secret(self.app_secret)
|
||||
.build()
|
||||
)
|
||||
return self._client
|
||||
|
||||
def _send_card(self, card: dict):
|
||||
"""Send an interactive card via Feishu.
|
||||
|
||||
Prefers lark_oapi SDK (same as the gateway), falls back to REST.
|
||||
"""
|
||||
try:
|
||||
receive_id, receive_id_type = self._resolve_target()
|
||||
if not receive_id:
|
||||
logger.warning("No Feishu receive target configured")
|
||||
return
|
||||
|
||||
content_str = json.dumps(card, ensure_ascii=False)
|
||||
|
||||
# Try SDK first (using cached client)
|
||||
try:
|
||||
from lark_oapi.api.im.v1 import CreateMessageRequest, CreateMessageRequestBody
|
||||
|
||||
client = self._get_client()
|
||||
|
||||
body = CreateMessageRequestBody.builder() \
|
||||
.receive_id(receive_id) \
|
||||
.msg_type("interactive") \
|
||||
.content(content_str) \
|
||||
.build()
|
||||
|
||||
request = CreateMessageRequest.builder() \
|
||||
.receive_id_type(receive_id_type) \
|
||||
.request_body(body) \
|
||||
.build()
|
||||
|
||||
response = client.im.v1.message.create(request)
|
||||
if response.success():
|
||||
logger.info("Feishu card sent via SDK")
|
||||
return
|
||||
logger.warning("Feishu SDK send failed: code=%s msg=%s", response.code, response.msg)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Fallback to REST API
|
||||
self._send_card_rest(receive_id, receive_id_type, content_str)
|
||||
|
||||
except Exception as exc:
|
||||
logger.warning("Feishu notification failed: %s", exc)
|
||||
|
||||
def _resolve_target(self) -> tuple:
|
||||
"""Resolve (receive_id, receive_id_type) from env config."""
|
||||
deliver_to = os.getenv("SELF_EVOLUTION_FEISHU_DELIVER", "user")
|
||||
if deliver_to.startswith("chat:"):
|
||||
return deliver_to.replace("chat:", ""), "chat_id"
|
||||
user_id = os.getenv("SELF_EVOLUTION_FEISHU_USER_ID", "")
|
||||
if not user_id:
|
||||
return "", ""
|
||||
if user_id.startswith("ou_"):
|
||||
return user_id, "open_id"
|
||||
if user_id.startswith("oc_"):
|
||||
return user_id, "chat_id"
|
||||
return user_id, "user_id"
|
||||
|
||||
def _send_card_rest(self, receive_id: str, receive_id_type: str, content: str):
|
||||
"""Fallback: send card via REST API."""
|
||||
import requests
|
||||
|
||||
token = self._get_tenant_token()
|
||||
if not token:
|
||||
logger.warning("Failed to get Feishu token")
|
||||
return
|
||||
|
||||
resp = requests.post(
|
||||
"https://open.feishu.cn/open-apis/im/v1/messages",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
params={"receive_id_type": receive_id_type},
|
||||
json={"receive_id": receive_id, "msg_type": "interactive", "content": content},
|
||||
timeout=30,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
logger.warning("Feishu REST send failed: %s", resp.text)
|
||||
|
||||
def _send_confirmation(self, proposal_id: str, message: str):
|
||||
"""Send a simple confirmation message."""
|
||||
if not self.enabled:
|
||||
return
|
||||
card = {
|
||||
"elements": [
|
||||
{
|
||||
"tag": "div",
|
||||
"text": {
|
||||
"tag": "lark_md",
|
||||
"content": f"**提案 `{proposal_id}`**: {message}",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
self._send_card(card)
|
||||
|
||||
def _get_tenant_token(self) -> Optional[str]:
|
||||
"""Get Feishu tenant access token with caching (1.5h TTL)."""
|
||||
if self._token_cache is not None:
|
||||
token, expire_at = self._token_cache
|
||||
if time.time() < expire_at:
|
||||
return token
|
||||
try:
|
||||
import requests
|
||||
resp = requests.post(
|
||||
"https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
|
||||
json={
|
||||
"app_id": self.app_id,
|
||||
"app_secret": self.app_secret,
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
token = resp.json().get("tenant_access_token")
|
||||
if token:
|
||||
# Feishu tokens expire in ~2h; cache for 1.5h
|
||||
self._token_cache = (token, time.time() + 5400)
|
||||
return token
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to get Feishu token: %s", exc)
|
||||
return None
|
||||
170
self_evolution/git_analyzer.py
Normal file
170
self_evolution/git_analyzer.py
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
"""
|
||||
Self Evolution Plugin — Git Analysis
|
||||
=====================================
|
||||
|
||||
Analyzes git commit history for the dream consolidation engine.
|
||||
|
||||
Uses a single batched ``git log --stat --name-only`` call instead of
|
||||
25+ individual subprocess invocations.
|
||||
|
||||
Extracted from reflection_engine.py for single-responsibility.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from self_evolution.models import CodeChangeAnalysis, CommitInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def analyze_code_changes(hours: int = 24) -> CodeChangeAnalysis:
|
||||
"""Analyze git commits from the previous period.
|
||||
|
||||
Uses a single batched git log call with --stat --name-only
|
||||
instead of 25+ individual subprocess calls.
|
||||
"""
|
||||
project_root = str(Path(__file__).resolve().parent.parent)
|
||||
|
||||
cutoff_epoch = time.time() - (hours * 3600)
|
||||
cutoff_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(cutoff_epoch))
|
||||
|
||||
try:
|
||||
# Single batched call: format + shortstat + name-only
|
||||
result = subprocess.run(
|
||||
["git", "log",
|
||||
"--format=COMMITSTART%h%n%s%n%an%n%at%n%b%nENDHEADER",
|
||||
"--shortstat", "--name-only",
|
||||
"--no-merges", f"--since={cutoff_date}", "-15"],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
cwd=project_root,
|
||||
)
|
||||
if result.returncode != 0 or not result.stdout.strip():
|
||||
return CodeChangeAnalysis()
|
||||
|
||||
commits = _parse_batched_output(result.stdout)
|
||||
if not commits:
|
||||
return CodeChangeAnalysis()
|
||||
|
||||
# Aggregate stats
|
||||
total_ins = sum(c.insertions for c in commits)
|
||||
total_del = sum(c.deletions for c in commits)
|
||||
total_files = sum(c.files_changed for c in commits)
|
||||
authors = list(dict.fromkeys(c.author for c in commits))
|
||||
|
||||
# Categorize by conventional commit prefix
|
||||
categories: Dict[str, int] = {}
|
||||
for c in commits:
|
||||
cat = _categorize_commit(c.subject)
|
||||
categories[cat] = categories.get(cat, 0) + 1
|
||||
|
||||
# Extract top-level module areas
|
||||
all_files = []
|
||||
for c in commits:
|
||||
all_files.extend(c.file_list)
|
||||
areas = list(dict.fromkeys(
|
||||
f.split("/")[0] for f in all_files
|
||||
if "/" in f and not f.startswith(".")
|
||||
))[:10]
|
||||
|
||||
return CodeChangeAnalysis(
|
||||
commits=commits,
|
||||
total_commits=len(commits),
|
||||
total_insertions=total_ins,
|
||||
total_deletions=total_del,
|
||||
total_files_changed=total_files,
|
||||
authors=authors,
|
||||
change_categories=categories,
|
||||
areas_touched=areas,
|
||||
)
|
||||
|
||||
except (subprocess.SubprocessError, FileNotFoundError, OSError):
|
||||
logger.debug("git analysis unavailable", exc_info=True)
|
||||
return CodeChangeAnalysis()
|
||||
|
||||
|
||||
def _parse_batched_output(stdout: str) -> list:
|
||||
"""Parse the batched git log output into CommitInfo objects."""
|
||||
commits = []
|
||||
raw_commits = stdout.split("COMMITSTART")
|
||||
for raw in raw_commits:
|
||||
raw = raw.strip()
|
||||
if not raw:
|
||||
continue
|
||||
|
||||
header_end = raw.find("ENDHEADER")
|
||||
if header_end < 0:
|
||||
continue
|
||||
header = raw[:header_end].strip()
|
||||
lines = header.split("\n")
|
||||
if len(lines) < 4:
|
||||
continue
|
||||
|
||||
hash_short = lines[0].strip()
|
||||
subject = lines[1].strip()
|
||||
author = lines[2].strip()
|
||||
try:
|
||||
timestamp = float(lines[3].strip())
|
||||
except ValueError:
|
||||
continue
|
||||
body = "\n".join(lines[4:]).strip()[:500]
|
||||
|
||||
# After ENDHEADER: shortstat line(s) + file list
|
||||
rest = raw[header_end + len("ENDHEADER"):].strip()
|
||||
|
||||
files_changed = 0
|
||||
insertions = 0
|
||||
deletions = 0
|
||||
file_list = []
|
||||
stat_done = False
|
||||
for rline in rest.split("\n"):
|
||||
rline = rline.strip()
|
||||
if not rline:
|
||||
continue
|
||||
if not stat_done and ("files changed" in rline or "file changed" in rline
|
||||
or "insertion" in rline or "deletion" in rline):
|
||||
files_changed = _parse_int(r'(\d+) files? changed', rline)
|
||||
insertions = _parse_int(r'(\d+) insertion', rline)
|
||||
deletions = _parse_int(r'(\d+) deletion', rline)
|
||||
stat_done = True
|
||||
continue
|
||||
if "/" in rline or "." in rline:
|
||||
file_list.append(rline)
|
||||
|
||||
commits.append(CommitInfo(
|
||||
hash_short=hash_short,
|
||||
subject=subject,
|
||||
body=body,
|
||||
author=author,
|
||||
timestamp=timestamp,
|
||||
files_changed=files_changed,
|
||||
insertions=insertions,
|
||||
deletions=deletions,
|
||||
file_list=file_list[:20],
|
||||
))
|
||||
|
||||
return commits
|
||||
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _parse_int(pattern: str, text: str) -> int:
|
||||
"""Extract first integer matching regex pattern from text."""
|
||||
m = re.search(pattern, text)
|
||||
return int(m.group(1)) if m else 0
|
||||
|
||||
|
||||
def _categorize_commit(subject: str) -> str:
|
||||
"""Categorize commit by conventional commit prefix."""
|
||||
s = subject.lower()
|
||||
for prefix in ("feat", "fix", "refactor", "test", "docs", "chore", "perf", "style", "ci", "build"):
|
||||
if s.startswith(prefix):
|
||||
return prefix
|
||||
return "other"
|
||||
200
self_evolution/hooks.py
Normal file
200
self_evolution/hooks.py
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
"""
|
||||
Self Evolution Plugin — Lifecycle Hooks
|
||||
========================================
|
||||
|
||||
Registered hooks:
|
||||
|
||||
- post_tool_call: Collect per-tool telemetry
|
||||
- on_session_end: Compute quality score + detect outcome signals
|
||||
- pre_llm_call: Inject learned strategy hints
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── Correction detection patterns (inspired by Claude Code conversation-analyzer) ──
|
||||
|
||||
CORRECTION_PATTERNS = re.compile(
|
||||
r"(不对|错误|重试|不要|停|stop|wrong|retry|no|don't|not that|不是|不是这个|为什么|换一种)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
FRUSTRATION_PATTERNS = re.compile(
|
||||
r"(烦|慢|太慢|浪费时间|浪费时间|浪费时间|why did you|无语|算了|够了)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
# ── post_tool_call ───────────────────────────────────────────────────────
|
||||
|
||||
def on_tool_call(**kwargs) -> None:
|
||||
"""Collect per-tool invocation telemetry."""
|
||||
from self_evolution.db import insert
|
||||
|
||||
tool_name = kwargs.get("tool_name", "unknown")
|
||||
started_at = kwargs.get("started_at", time.time())
|
||||
duration_ms = kwargs.get("duration_ms", 0)
|
||||
success = kwargs.get("success", True)
|
||||
error_type = kwargs.get("error_type") if not success else None
|
||||
session_id = kwargs.get("session_id", "")
|
||||
turn_number = kwargs.get("turn_number", 0)
|
||||
|
||||
try:
|
||||
insert("tool_invocations", {
|
||||
"session_id": session_id,
|
||||
"tool_name": tool_name,
|
||||
"duration_ms": duration_ms,
|
||||
"success": success,
|
||||
"error_type": error_type,
|
||||
"turn_number": turn_number,
|
||||
"created_at": started_at,
|
||||
})
|
||||
except Exception as exc:
|
||||
logger.warning("telemetry insert failed: %s", exc)
|
||||
|
||||
|
||||
# ── on_session_end ───────────────────────────────────────────────────────
|
||||
|
||||
def on_session_end(**kwargs) -> None:
|
||||
"""Compute quality score and detect outcome signals when session ends."""
|
||||
from self_evolution.db import insert, insert_many
|
||||
from self_evolution.quality_scorer import compute_score
|
||||
|
||||
session_data = kwargs.get("session_data", {})
|
||||
session_id = session_data.get("session_id", "")
|
||||
|
||||
if not session_id:
|
||||
return
|
||||
|
||||
# Compute quality score
|
||||
score = compute_score(session_data)
|
||||
try:
|
||||
insert("session_scores", score.to_db_row())
|
||||
except Exception as exc:
|
||||
logger.warning("score insert failed: %s", exc)
|
||||
|
||||
# Detect and batch-insert outcome signals
|
||||
signals = _detect_outcome_signals(session_data, kwargs)
|
||||
if signals:
|
||||
try:
|
||||
insert_many("outcome_signals", signals)
|
||||
except Exception as exc:
|
||||
logger.warning("signal insert failed: %s", exc)
|
||||
|
||||
|
||||
def _detect_outcome_signals(session_data: dict, kwargs: dict) -> list:
|
||||
"""Detect implicit outcome signals from session behavior.
|
||||
|
||||
Inspired by Claude Code conversation-analyzer's signal detection:
|
||||
- Explicit corrections: user says "不对", "重试"
|
||||
- Frustration signals: user says "为什么", "太慢"
|
||||
- Completion / interruption status
|
||||
- Budget exhaustion
|
||||
"""
|
||||
signals = []
|
||||
session_id = session_data.get("session_id", "")
|
||||
|
||||
# Completion signal
|
||||
completed = session_data.get("completed", False)
|
||||
interrupted = session_data.get("interrupted", False)
|
||||
partial = session_data.get("partial", False)
|
||||
|
||||
if completed:
|
||||
signals.append({
|
||||
"session_id": session_id,
|
||||
"signal_type": "completed",
|
||||
"signal_value": 1.0,
|
||||
"metadata": "{}",
|
||||
})
|
||||
elif interrupted:
|
||||
signals.append({
|
||||
"session_id": session_id,
|
||||
"signal_type": "interrupted",
|
||||
"signal_value": 0.5,
|
||||
"metadata": "{}",
|
||||
})
|
||||
elif partial:
|
||||
signals.append({
|
||||
"session_id": session_id,
|
||||
"signal_type": "partial",
|
||||
"signal_value": 0.3,
|
||||
"metadata": "{}",
|
||||
})
|
||||
|
||||
# Budget exhaustion
|
||||
max_iterations = session_data.get("max_iterations", 0)
|
||||
iterations = session_data.get("iterations", 0)
|
||||
if max_iterations and iterations >= max_iterations:
|
||||
signals.append({
|
||||
"session_id": session_id,
|
||||
"signal_type": "budget_exhausted",
|
||||
"signal_value": 0.0,
|
||||
"metadata": f'{{"iterations": {iterations}}}',
|
||||
})
|
||||
|
||||
# User correction / frustration detection from messages
|
||||
messages = session_data.get("messages", [])
|
||||
for msg in messages:
|
||||
if msg.get("role") != "user":
|
||||
continue
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, list):
|
||||
content = " ".join(
|
||||
block.get("text", "") for block in content
|
||||
if isinstance(block, dict) and block.get("type") == "text"
|
||||
)
|
||||
|
||||
if CORRECTION_PATTERNS.search(content):
|
||||
signals.append({
|
||||
"session_id": session_id,
|
||||
"signal_type": "correction",
|
||||
"signal_value": 0.2,
|
||||
"metadata": f'{{"text": {repr(content[:100])}}}',
|
||||
})
|
||||
break # Only one correction signal per session
|
||||
|
||||
if FRUSTRATION_PATTERNS.search(content):
|
||||
signals.append({
|
||||
"session_id": session_id,
|
||||
"signal_type": "frustration",
|
||||
"signal_value": 0.1,
|
||||
"metadata": f'{{"text": {repr(content[:100])}}}',
|
||||
})
|
||||
break
|
||||
|
||||
return signals
|
||||
|
||||
|
||||
# ── pre_llm_call ─────────────────────────────────────────────────────────
|
||||
|
||||
def on_pre_llm_call(**kwargs) -> Optional[Dict[str, Any]]:
|
||||
"""Inject learned strategy hints into system prompt.
|
||||
|
||||
Inspired by Claude Code learning-output-style SessionStart hook pattern:
|
||||
automatically inject behavioral context without user action.
|
||||
"""
|
||||
from self_evolution.strategy_injector import inject_hints
|
||||
|
||||
try:
|
||||
hints = inject_hints(kwargs)
|
||||
if hints:
|
||||
return {"system_hint": hints}
|
||||
except Exception as exc:
|
||||
logger.warning("strategy injection failed: %s", exc)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ── Registration ─────────────────────────────────────────────────────────
|
||||
|
||||
def register_all(ctx) -> None:
|
||||
"""Register all lifecycle hooks via PluginContext."""
|
||||
ctx.register_hook("post_tool_call", on_tool_call)
|
||||
ctx.register_hook("on_session_end", on_session_end)
|
||||
ctx.register_hook("pre_llm_call", on_pre_llm_call)
|
||||
248
self_evolution/model_config.py
Normal file
248
self_evolution/model_config.py
Normal file
|
|
@ -0,0 +1,248 @@
|
|||
"""
|
||||
Self Evolution Plugin — Model Configuration & Failover
|
||||
======================================================
|
||||
|
||||
Handles runtime model resolution (primary / fallback / multimodal)
|
||||
and thread-safe failover state management.
|
||||
|
||||
Extracted from reflection_engine.py for single-responsibility.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ── Model Configuration Resolution ────────────────────────────────────────
|
||||
|
||||
|
||||
def resolve_config() -> dict:
|
||||
"""Resolve model config via hermes unified runtime provider.
|
||||
|
||||
Returns dict with:
|
||||
base_url, api_key, model, provider — primary text model
|
||||
fallback: {base_url, api_key, model, provider} — fallback text model
|
||||
multimodal: {base_url, api_key, model, provider} — vision model
|
||||
Returns empty dict if no provider is available.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
runtime = resolve_runtime_provider()
|
||||
config = load_config()
|
||||
model_name = config.get("model", {}).get("default", "")
|
||||
|
||||
result = {
|
||||
"base_url": runtime.get("base_url", ""),
|
||||
"api_key": runtime.get("api_key", ""),
|
||||
"model": runtime.get("model", model_name),
|
||||
"provider": runtime.get("provider", ""),
|
||||
}
|
||||
|
||||
result["fallback"] = _resolve_fallback_config(config)
|
||||
result["multimodal"] = _resolve_multimodal_config(config)
|
||||
|
||||
return result
|
||||
except Exception:
|
||||
logger.warning("Failed to resolve runtime provider", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def _resolve_fallback_config(config: dict = None) -> dict:
|
||||
"""Resolve fallback text model from config.yaml fallback_providers."""
|
||||
try:
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
if config is None:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
|
||||
for fb in config.get("fallback_providers", []):
|
||||
fb_provider = (fb.get("provider") or "").strip()
|
||||
fb_model = (fb.get("model") or "").strip()
|
||||
if not fb_provider:
|
||||
continue
|
||||
try:
|
||||
rt = resolve_runtime_provider(requested=fb_provider)
|
||||
base_url = rt.get("base_url", "")
|
||||
api_key = rt.get("api_key", "")
|
||||
if base_url and fb_model:
|
||||
return {
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"model": fb_model,
|
||||
"provider": rt.get("provider", ""),
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for cp in config.get("custom_providers", []):
|
||||
base_url = (cp.get("base_url") or cp.get("api", "")).strip()
|
||||
if base_url and ("localhost" in base_url or "127.0.0.1" in base_url):
|
||||
model = (cp.get("model") or "").strip()
|
||||
if not model:
|
||||
model = _detect_local_model(
|
||||
base_url,
|
||||
(cp.get("api_key") or "").strip(),
|
||||
)
|
||||
if model and "gemma-4-26b" not in model.lower():
|
||||
return {
|
||||
"base_url": base_url.rstrip("/"),
|
||||
"api_key": (cp.get("api_key") or "").strip(),
|
||||
"model": model,
|
||||
"provider": "custom",
|
||||
}
|
||||
|
||||
return {}
|
||||
except Exception:
|
||||
logger.warning("Failed to resolve fallback config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def _resolve_multimodal_config(config: dict = None) -> dict:
|
||||
"""Resolve multimodal (vision) model config."""
|
||||
try:
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
if config is None:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
|
||||
aux = config.get("auxiliary", {})
|
||||
vision_cfg = aux.get("vision", {})
|
||||
vision_provider = (vision_cfg.get("provider") or "").strip().lower()
|
||||
if vision_provider and vision_provider != "auto":
|
||||
try:
|
||||
rt = resolve_runtime_provider(requested=vision_provider)
|
||||
if rt.get("base_url"):
|
||||
return {
|
||||
"base_url": rt.get("base_url", ""),
|
||||
"api_key": rt.get("api_key", ""),
|
||||
"model": vision_cfg.get("model") or rt.get("model", ""),
|
||||
"provider": rt.get("provider", ""),
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for cp in config.get("custom_providers", []):
|
||||
base_url = (cp.get("base_url") or cp.get("api", "")).strip()
|
||||
if base_url and ("localhost" in base_url or "127.0.0.1" in base_url):
|
||||
api_key = (cp.get("api_key") or "").strip()
|
||||
key_env = (cp.get("key_env") or "").strip()
|
||||
if not api_key and key_env:
|
||||
import os
|
||||
api_key = os.getenv(key_env, "")
|
||||
model = (cp.get("model") or "").strip()
|
||||
if not model:
|
||||
model = _detect_local_model(base_url, api_key)
|
||||
if model:
|
||||
return {
|
||||
"base_url": base_url.rstrip("/"),
|
||||
"api_key": api_key,
|
||||
"model": model,
|
||||
"provider": "custom",
|
||||
}
|
||||
|
||||
return {}
|
||||
except Exception:
|
||||
logger.warning("Failed to resolve multimodal config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
# ── Failover State (thread-safe) ──────────────────────────────────────────
|
||||
|
||||
_active_model: str = "primary"
|
||||
_last_health_check: float = 0.0
|
||||
_HEALTH_CHECK_INTERVAL: int = 1800 # 30 minutes
|
||||
_failover_lock = threading.Lock()
|
||||
|
||||
|
||||
def _check_primary_health(config: dict) -> bool:
|
||||
"""Quick health check: send a minimal request to the primary model."""
|
||||
try:
|
||||
import requests
|
||||
base_url = config.get("base_url", "")
|
||||
api_key = config.get("api_key", "")
|
||||
model = config.get("model", "")
|
||||
if not base_url or not model:
|
||||
return False
|
||||
resp = requests.post(
|
||||
f"{base_url.rstrip('/')}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": model,
|
||||
"messages": [{"role": "user", "content": "OK"}],
|
||||
"max_tokens": 2,
|
||||
},
|
||||
timeout=15,
|
||||
)
|
||||
return resp.status_code == 200
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def get_active_text_config(config: dict) -> tuple:
|
||||
"""Return (active_config_dict, is_fallback) based on failover state."""
|
||||
global _active_model, _last_health_check
|
||||
|
||||
with _failover_lock:
|
||||
now = time.time()
|
||||
|
||||
if _active_model == "fallback":
|
||||
if now - _last_health_check >= _HEALTH_CHECK_INTERVAL:
|
||||
_last_health_check = now
|
||||
if _check_primary_health(config):
|
||||
_active_model = "primary"
|
||||
logger.info("Primary model recovered, switching back")
|
||||
else:
|
||||
logger.info("Primary model still unavailable, staying on fallback")
|
||||
|
||||
fallback = config.get("fallback", {})
|
||||
if _active_model == "primary":
|
||||
return config, False
|
||||
elif fallback:
|
||||
return fallback, True
|
||||
else:
|
||||
return config, False
|
||||
|
||||
|
||||
def switch_to_fallback():
|
||||
"""Mark primary as down and switch to fallback."""
|
||||
global _active_model, _last_health_check
|
||||
with _failover_lock:
|
||||
_active_model = "fallback"
|
||||
_last_health_check = time.time()
|
||||
logger.warning("Primary model failed, switched to fallback")
|
||||
|
||||
|
||||
def _detect_local_model(base_url: str, api_key: str = "") -> str:
|
||||
"""Auto-detect a multimodal model from a local server."""
|
||||
try:
|
||||
import requests
|
||||
headers = {}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
resp = requests.get(
|
||||
f"{base_url.rstrip('/')}/models",
|
||||
headers=headers, timeout=5,
|
||||
)
|
||||
if resp.ok:
|
||||
models = resp.json().get("data", [])
|
||||
multimodal_hints = ["gemma-4", "qwen2-vl", "qwen-vl", "llava", "pixtral", "vision"]
|
||||
for m in models:
|
||||
mid = m.get("id", "").lower()
|
||||
for hint in multimodal_hints:
|
||||
if hint in mid:
|
||||
return m["id"]
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
363
self_evolution/models.py
Normal file
363
self_evolution/models.py
Normal file
|
|
@ -0,0 +1,363 @@
|
|||
"""
|
||||
Self Evolution Plugin — Data Models
|
||||
=====================================
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from typing import Any, Dict, List, Optional
|
||||
import json
|
||||
import time
|
||||
|
||||
|
||||
def _now() -> float:
|
||||
return time.time()
|
||||
|
||||
|
||||
def _ts() -> str:
|
||||
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
|
||||
|
||||
|
||||
# ── Quality Scoring ──────────────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class QualityScore:
|
||||
session_id: str
|
||||
composite: float = 0.0
|
||||
completion_rate: float = 0.0
|
||||
efficiency_score: float = 0.0
|
||||
cost_efficiency: float = 0.0
|
||||
satisfaction_proxy: float = 0.0
|
||||
task_category: str = ""
|
||||
model: str = ""
|
||||
created_at: float = field(default_factory=_now)
|
||||
|
||||
def to_db_row(self) -> dict:
|
||||
return {
|
||||
"session_id": self.session_id,
|
||||
"composite_score": self.composite,
|
||||
"completion_rate": self.completion_rate,
|
||||
"efficiency_score": self.efficiency_score,
|
||||
"cost_efficiency": self.cost_efficiency,
|
||||
"satisfaction_proxy": self.satisfaction_proxy,
|
||||
"task_category": self.task_category,
|
||||
"model": self.model,
|
||||
"created_at": self.created_at,
|
||||
}
|
||||
|
||||
|
||||
# ── Error Analysis ───────────────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class ToolFailure:
|
||||
tool_name: str
|
||||
error_type: str
|
||||
count: int
|
||||
sessions_affected: List[str] = field(default_factory=list)
|
||||
example_session: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class RetryPattern:
|
||||
session_id: str
|
||||
tool_name: str
|
||||
attempt_count: int
|
||||
final_outcome: str # "success" | "failure" | "abandoned"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ErrorAnalysis:
|
||||
tool_failures: List[ToolFailure] = field(default_factory=list)
|
||||
retry_patterns: List[RetryPattern] = field(default_factory=list)
|
||||
incomplete_sessions: List[str] = field(default_factory=list)
|
||||
user_corrections: int = 0
|
||||
correction_examples: List[str] = field(default_factory=list)
|
||||
api_error_count: int = 0
|
||||
api_error_types: Dict[str, int] = field(default_factory=dict)
|
||||
|
||||
def summary(self) -> str:
|
||||
lines = []
|
||||
if self.tool_failures:
|
||||
lines.append(f"工具失败: {len(self.tool_failures)} 种工具出错")
|
||||
for tf in self.tool_failures[:5]:
|
||||
lines.append(f" - {tf.tool_name}: {tf.count}次 ({tf.error_type})")
|
||||
if self.retry_patterns:
|
||||
retries = len(self.retry_patterns)
|
||||
lines.append(f"重复重试: {retries} 次")
|
||||
if self.incomplete_sessions:
|
||||
lines.append(f"未完成session: {len(self.incomplete_sessions)} 个")
|
||||
if self.user_corrections:
|
||||
lines.append(f"用户纠正: {self.user_corrections} 次")
|
||||
if self.api_error_count:
|
||||
lines.append(f"API错误: {self.api_error_count} 次")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ── Time Waste Analysis ──────────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class ToolDuration:
|
||||
tool_name: str
|
||||
total_duration_ms: int
|
||||
call_count: int
|
||||
avg_duration_ms: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class RepeatedOperation:
|
||||
description: str
|
||||
count: int
|
||||
sessions: List[str] = field(default_factory=list)
|
||||
wasted_ms: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class WasteAnalysis:
|
||||
slowest_tools: List[ToolDuration] = field(default_factory=list)
|
||||
repeated_operations: List[RepeatedOperation] = field(default_factory=list)
|
||||
inefficient_sessions: List[str] = field(default_factory=list)
|
||||
shortcut_opportunities: List[str] = field(default_factory=list)
|
||||
|
||||
def summary(self) -> str:
|
||||
lines = []
|
||||
if self.slowest_tools:
|
||||
lines.append("耗时最长的工具:")
|
||||
for td in self.slowest_tools[:5]:
|
||||
lines.append(f" - {td.tool_name}: 平均{td.avg_duration_ms:.0f}ms ({td.call_count}次)")
|
||||
if self.repeated_operations:
|
||||
lines.append(f"重复操作: {len(self.repeated_operations)} 种")
|
||||
for ro in self.repeated_operations[:5]:
|
||||
lines.append(f" - {ro.description}: {ro.count}次")
|
||||
if self.inefficient_sessions:
|
||||
lines.append(f"低效session: {len(self.inefficient_sessions)} 个")
|
||||
if self.shortcut_opportunities:
|
||||
lines.append(f"可优化路径: {len(self.shortcut_opportunities)} 个")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ── Code Change Analysis ──────────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class CommitInfo:
|
||||
hash_short: str
|
||||
subject: str
|
||||
body: str = ""
|
||||
author: str = ""
|
||||
timestamp: float = 0.0
|
||||
files_changed: int = 0
|
||||
insertions: int = 0
|
||||
deletions: int = 0
|
||||
file_list: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CodeChangeAnalysis:
|
||||
commits: List[CommitInfo] = field(default_factory=list)
|
||||
total_commits: int = 0
|
||||
total_insertions: int = 0
|
||||
total_deletions: int = 0
|
||||
total_files_changed: int = 0
|
||||
authors: List[str] = field(default_factory=list)
|
||||
change_categories: Dict[str, int] = field(default_factory=dict)
|
||||
areas_touched: List[str] = field(default_factory=list)
|
||||
|
||||
def summary(self) -> str:
|
||||
if not self.commits:
|
||||
return "代码更新: 无新提交"
|
||||
lines = [
|
||||
f"代码更新: {self.total_commits} commits, "
|
||||
f"+{self.total_insertions}/-{self.total_deletions} lines, "
|
||||
f"{self.total_files_changed} files changed",
|
||||
]
|
||||
if self.change_categories:
|
||||
cats = ", ".join(f"{k}: {v}" for k, v in self.change_categories.items())
|
||||
lines.append(f"提交类型分布: {cats}")
|
||||
if self.areas_touched:
|
||||
lines.append(f"涉及模块: {', '.join(self.areas_touched)}")
|
||||
lines.append("主要变更:")
|
||||
for c in self.commits[:8]:
|
||||
lines.append(f" - {c.subject} ({c.hash_short}, +{c.insertions}/-{c.deletions})")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ── Reflection Report ────────────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class ReflectionReport:
|
||||
period_start: float
|
||||
period_end: float
|
||||
sessions_analyzed: int = 0
|
||||
avg_score: float = 0.0
|
||||
error_summary: str = ""
|
||||
waste_summary: str = ""
|
||||
worst_patterns: List[str] = field(default_factory=list)
|
||||
best_patterns: List[str] = field(default_factory=list)
|
||||
tool_insights: Dict[str, Dict[str, Any]] = field(default_factory=dict)
|
||||
recommendations: List[str] = field(default_factory=list)
|
||||
code_change_summary: str = ""
|
||||
model_used: str = ""
|
||||
created_at: float = field(default_factory=_now)
|
||||
|
||||
def to_db_row(self) -> dict:
|
||||
return {
|
||||
"period_start": self.period_start,
|
||||
"period_end": self.period_end,
|
||||
"sessions_analyzed": self.sessions_analyzed,
|
||||
"avg_score": self.avg_score,
|
||||
"error_summary": self.error_summary,
|
||||
"waste_summary": self.waste_summary,
|
||||
"worst_patterns": json.dumps(self.worst_patterns, ensure_ascii=False),
|
||||
"best_patterns": json.dumps(self.best_patterns, ensure_ascii=False),
|
||||
"tool_insights": json.dumps(self.tool_insights, ensure_ascii=False),
|
||||
"recommendations": json.dumps(self.recommendations, ensure_ascii=False),
|
||||
"code_change_summary": self.code_change_summary,
|
||||
"model_used": self.model_used,
|
||||
"created_at": self.created_at,
|
||||
}
|
||||
|
||||
|
||||
# ── Evolution Proposal ───────────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class Proposal:
|
||||
id: str
|
||||
proposal_type: str # skill | strategy | memory | tool_preference | code_improvement
|
||||
title: str
|
||||
description: str
|
||||
expected_impact: str = ""
|
||||
risk_assessment: str = "low"
|
||||
rollback_plan: str = ""
|
||||
status: str = "pending_approval"
|
||||
report_id: Optional[int] = None
|
||||
user_feedback: str = ""
|
||||
created_at: float = field(default_factory=_now)
|
||||
resolved_at: Optional[float] = None
|
||||
|
||||
def to_db_row(self) -> dict:
|
||||
return {
|
||||
"id": self.id,
|
||||
"report_id": self.report_id,
|
||||
"proposal_type": self.proposal_type,
|
||||
"title": self.title,
|
||||
"description": self.description,
|
||||
"expected_impact": self.expected_impact,
|
||||
"risk_assessment": self.risk_assessment,
|
||||
"rollback_plan": self.rollback_plan,
|
||||
"status": self.status,
|
||||
"user_feedback": self.user_feedback,
|
||||
"created_at": self.created_at,
|
||||
"resolved_at": self.resolved_at,
|
||||
}
|
||||
|
||||
|
||||
# ── Improvement Unit (A/B Test Tracking) ─────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class ImprovementUnit:
|
||||
id: str
|
||||
proposal_id: str
|
||||
change_type: str
|
||||
version: int = 0
|
||||
baseline_score: float = 0.0
|
||||
current_score: float = 0.0
|
||||
sessions_sampled: int = 0
|
||||
min_sessions: int = 10
|
||||
min_improvement: float = 0.05
|
||||
max_regression: float = 0.10
|
||||
status: str = "active" # active | promoted | reverted
|
||||
created_at: float = field(default_factory=_now)
|
||||
resolved_at: Optional[float] = None
|
||||
|
||||
@property
|
||||
def should_revert(self) -> bool:
|
||||
return (
|
||||
self.sessions_sampled >= 3
|
||||
and (self.baseline_score - self.current_score) > self.max_regression
|
||||
)
|
||||
|
||||
@property
|
||||
def should_promote(self) -> bool:
|
||||
return (
|
||||
self.sessions_sampled >= self.min_sessions
|
||||
and (self.current_score - self.baseline_score) >= self.min_improvement
|
||||
)
|
||||
|
||||
def to_db_row(self) -> dict:
|
||||
return {
|
||||
"id": self.id,
|
||||
"proposal_id": self.proposal_id,
|
||||
"change_type": self.change_type,
|
||||
"version": self.version,
|
||||
"baseline_score": self.baseline_score,
|
||||
"current_score": self.current_score,
|
||||
"sessions_sampled": self.sessions_sampled,
|
||||
"min_sessions": self.min_sessions,
|
||||
"min_improvement": self.min_improvement,
|
||||
"max_regression": self.max_regression,
|
||||
"status": self.status,
|
||||
"created_at": self.created_at,
|
||||
"resolved_at": self.resolved_at,
|
||||
}
|
||||
|
||||
|
||||
# ── Strategy Rule ────────────────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class StrategyCondition:
|
||||
field: str
|
||||
operator: str # regex_match | contains | equals | not_contains
|
||||
pattern: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class StrategyRule:
|
||||
id: str
|
||||
name: str
|
||||
strategy_type: str # hint | avoid | prefer
|
||||
description: str
|
||||
conditions: List[StrategyCondition] = field(default_factory=list)
|
||||
hint_text: str = ""
|
||||
severity: str = "medium" # high | medium | low
|
||||
enabled: bool = True
|
||||
version: int = 1
|
||||
source: str = "learned" # learned | manual | default
|
||||
created_at: float = field(default_factory=_now)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"id": self.id,
|
||||
"name": self.name,
|
||||
"strategy_type": self.strategy_type,
|
||||
"description": self.description,
|
||||
"conditions": [
|
||||
{"field": c.field, "operator": c.operator, "pattern": c.pattern}
|
||||
for c in self.conditions
|
||||
],
|
||||
"hint_text": self.hint_text,
|
||||
"severity": self.severity,
|
||||
"enabled": self.enabled,
|
||||
"version": self.version,
|
||||
"source": self.source,
|
||||
"created_at": self.created_at,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: dict) -> StrategyRule:
|
||||
conditions = [
|
||||
StrategyCondition(field=c["field"], operator=c["operator"], pattern=c["pattern"])
|
||||
for c in d.get("conditions", [])
|
||||
]
|
||||
return cls(
|
||||
id=d["id"],
|
||||
name=d["name"],
|
||||
strategy_type=d.get("strategy_type", "hint"),
|
||||
description=d.get("description", ""),
|
||||
conditions=conditions,
|
||||
hint_text=d.get("hint_text", ""),
|
||||
severity=d.get("severity", "medium"),
|
||||
enabled=d.get("enabled", True),
|
||||
version=d.get("version", 1),
|
||||
source=d.get("source", "learned"),
|
||||
created_at=d.get("created_at", _now()),
|
||||
)
|
||||
17
self_evolution/paths.py
Normal file
17
self_evolution/paths.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
"""
|
||||
Self Evolution Plugin — Centralized Path Definitions
|
||||
=====================================================
|
||||
|
||||
Single source of truth for all filesystem paths used by the plugin.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
HERMES_HOME = Path.home() / ".hermes"
|
||||
DATA_DIR = HERMES_HOME / "self_evolution"
|
||||
DB_PATH = DATA_DIR / "evolution.db"
|
||||
STRATEGIES_FILE = DATA_DIR / "strategies.json"
|
||||
ARCHIVE_DIR = DATA_DIR / "archive"
|
||||
SKILLS_DIR = HERMES_HOME / "skills" / "learned"
|
||||
MEMORIES_DIR = HERMES_HOME / "memories"
|
||||
CRON_DIR = HERMES_HOME / "cron"
|
||||
7
self_evolution/plugin.yaml
Normal file
7
self_evolution/plugin.yaml
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
name: self_evolution
|
||||
version: "1.0.0"
|
||||
description: "Agent self-optimization and continuous evolution — dream consolidation at 1:00, Feishu approval at 19:00"
|
||||
provides_hooks:
|
||||
- post_tool_call
|
||||
- on_session_end
|
||||
- pre_llm_call
|
||||
7
self_evolution/prompts/reflection.md
Normal file
7
self_evolution/prompts/reflection.md
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
## 概况
|
||||
- 时段: {period_range}
|
||||
- Session 数: {sessions_count}, 平均质量: {avg_score}
|
||||
- 工具调用: {total_invocations} 次, 成功率 {success_rate}%
|
||||
|
||||
## 数据
|
||||
{data_json}
|
||||
177
self_evolution/quality_scorer.py
Normal file
177
self_evolution/quality_scorer.py
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
"""
|
||||
Self Evolution Plugin — Quality Scorer
|
||||
=======================================
|
||||
|
||||
Computes a composite quality score for each session:
|
||||
|
||||
session_quality = 0.4 * completion_rate
|
||||
+ 0.2 * efficiency_score
|
||||
+ 0.15 * cost_efficiency
|
||||
+ 0.25 * satisfaction_proxy
|
||||
|
||||
Zero API cost — pure computation from already-collected session data.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
from self_evolution.models import QualityScore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── Weights ──────────────────────────────────────────────────────────────
|
||||
|
||||
W_COMPLETION = 0.40
|
||||
W_EFFICIENCY = 0.20
|
||||
W_COST = 0.15
|
||||
W_SATISFACTION = 0.25
|
||||
|
||||
# Ideal iteration counts by task complexity
|
||||
IDEAL_ITERATIONS = {
|
||||
"simple": 3,
|
||||
"medium": 8,
|
||||
"complex": 15,
|
||||
}
|
||||
DEFAULT_IDEAL_ITERATIONS = 8
|
||||
|
||||
|
||||
def compute_score(session_data: dict) -> QualityScore:
|
||||
"""Compute a composite quality score from session data.
|
||||
|
||||
Args:
|
||||
session_data: dict with keys like:
|
||||
- completed, interrupted, partial
|
||||
- iterations, max_iterations
|
||||
- tool_call_count, message_count
|
||||
- input_tokens, output_tokens, estimated_cost_usd
|
||||
- duration_seconds
|
||||
- model, platform
|
||||
- messages (list)
|
||||
|
||||
Returns:
|
||||
QualityScore with individual and composite scores.
|
||||
"""
|
||||
session_id = session_data.get("session_id", "")
|
||||
|
||||
completion = _completion_rate(session_data)
|
||||
efficiency = _efficiency_score(session_data)
|
||||
cost = _cost_efficiency(session_data)
|
||||
satisfaction = _satisfaction_proxy(session_data)
|
||||
|
||||
composite = (
|
||||
W_COMPLETION * completion
|
||||
+ W_EFFICIENCY * efficiency
|
||||
+ W_COST * cost
|
||||
+ W_SATISFACTION * satisfaction
|
||||
)
|
||||
|
||||
return QualityScore(
|
||||
session_id=session_id,
|
||||
composite=round(composite, 3),
|
||||
completion_rate=round(completion, 3),
|
||||
efficiency_score=round(efficiency, 3),
|
||||
cost_efficiency=round(cost, 3),
|
||||
satisfaction_proxy=round(satisfaction, 3),
|
||||
task_category=_detect_task_category(session_data),
|
||||
model=session_data.get("model", ""),
|
||||
)
|
||||
|
||||
|
||||
# ── Individual Score Components ──────────────────────────────────────────
|
||||
|
||||
def _completion_rate(session_data: dict) -> float:
|
||||
"""1.0 if completed, 0.5 if interrupted, 0.0 if failed."""
|
||||
if session_data.get("completed"):
|
||||
return 1.0
|
||||
if session_data.get("interrupted"):
|
||||
return 0.5
|
||||
if session_data.get("partial"):
|
||||
return 0.3
|
||||
return 0.0
|
||||
|
||||
|
||||
def _efficiency_score(session_data: dict) -> float:
|
||||
"""Ideal iterations / actual iterations, capped at 1.0."""
|
||||
iterations = session_data.get("iterations", 0) or session_data.get("tool_call_count", 0)
|
||||
if iterations <= 0:
|
||||
return 1.0
|
||||
|
||||
category = _detect_task_category(session_data)
|
||||
ideal = IDEAL_ITERATIONS.get(category, DEFAULT_IDEAL_ITERATIONS)
|
||||
|
||||
return min(1.0, ideal / max(iterations, 1))
|
||||
|
||||
|
||||
def _cost_efficiency(session_data: dict) -> float:
|
||||
"""Baseline cost / actual cost, capped at 1.0.
|
||||
|
||||
Uses message count as a proxy for expected work.
|
||||
"""
|
||||
messages = session_data.get("message_count", 1) or 1
|
||||
tool_calls = session_data.get("tool_call_count", 0) or 0
|
||||
iterations = session_data.get("iterations", 0) or 0
|
||||
|
||||
# Expected: roughly 2 tool calls per user message
|
||||
expected_tool_calls = messages * 2
|
||||
|
||||
if expected_tool_calls <= 0:
|
||||
return 1.0
|
||||
|
||||
return min(1.0, expected_tool_calls / max(tool_calls, 1))
|
||||
|
||||
|
||||
def _satisfaction_proxy(session_data: dict) -> float:
|
||||
"""Estimate satisfaction from behavioral signals.
|
||||
|
||||
Signals:
|
||||
- Single-turn session (user got what they needed) = high
|
||||
- Multi-turn but completed = medium-high
|
||||
- User corrections detected = lower
|
||||
- Budget exhausted = low
|
||||
"""
|
||||
messages = session_data.get("message_count", 1) or 1
|
||||
completed = session_data.get("completed", False)
|
||||
max_iterations = session_data.get("max_iterations", 0)
|
||||
iterations = session_data.get("iterations", 0)
|
||||
|
||||
score = 0.7 # baseline
|
||||
|
||||
# Single-turn completion is a strong positive signal
|
||||
if messages <= 2 and completed:
|
||||
score = 0.9
|
||||
elif completed:
|
||||
score = 0.75
|
||||
elif messages > 10:
|
||||
score = 0.5
|
||||
|
||||
# Budget exhaustion is a negative signal
|
||||
if max_iterations and iterations >= max_iterations:
|
||||
score -= 0.2
|
||||
|
||||
return max(0.0, min(1.0, score))
|
||||
|
||||
|
||||
# ── Task Category Detection ──────────────────────────────────────────────
|
||||
|
||||
def _detect_task_category(session_data: dict) -> str:
|
||||
"""Detect task category from tool usage patterns."""
|
||||
tool_names = session_data.get("tool_names", [])
|
||||
if isinstance(tool_names, str):
|
||||
tool_names = tool_names.split(",")
|
||||
|
||||
tool_set = set(t.lower() for t in tool_names) if tool_names else set()
|
||||
|
||||
coding_tools = {"terminal", "bash", "write", "edit", "file_write", "file_edit"}
|
||||
web_tools = {"web_search", "browser", "browser_navigate", "scrape", "fetch"}
|
||||
file_tools = {"read", "file_read", "grep", "glob", "find"}
|
||||
|
||||
if tool_set & coding_tools:
|
||||
return "coding"
|
||||
if tool_set & web_tools:
|
||||
return "web_research"
|
||||
if tool_set & file_tools:
|
||||
return "file_analysis"
|
||||
|
||||
return "general"
|
||||
751
self_evolution/reflection_engine.py
Normal file
751
self_evolution/reflection_engine.py
Normal file
|
|
@ -0,0 +1,751 @@
|
|||
"""
|
||||
Self Evolution Plugin — Dream Engine (Reflection Engine)
|
||||
=========================================================
|
||||
|
||||
Runs nightly at 1:00 to analyze the previous day's sessions.
|
||||
|
||||
Design reference: Claude Code plugins/hookify/agents/conversation-analyzer.md
|
||||
- Analyzes conversations in reverse chronological order
|
||||
- Detects: corrections, frustrations, repeated issues, reversions
|
||||
- Extracts tool usage patterns, converts to actionable rules
|
||||
- Categorizes by severity
|
||||
|
||||
We extend this pattern with:
|
||||
- Full automated analysis (not just on user request)
|
||||
- Error analysis (tool failures, retries, API errors)
|
||||
- Time waste analysis (slow tools, repeated ops, inefficient sessions)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from self_evolution import db
|
||||
from self_evolution.model_config import resolve_config, get_active_text_config, switch_to_fallback
|
||||
from self_evolution.git_analyzer import analyze_code_changes
|
||||
from self_evolution.models import (
|
||||
ErrorAnalysis, ToolFailure, RetryPattern,
|
||||
WasteAnalysis, ToolDuration, RepeatedOperation,
|
||||
CodeChangeAnalysis, CommitInfo,
|
||||
ReflectionReport,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ── Backward-compatible aliases ────────────────────────────────────────────
|
||||
# These are used by cron_jobs.py and other callers.
|
||||
_resolve_runtime_config = resolve_config
|
||||
_get_active_text_config = get_active_text_config
|
||||
_switch_to_fallback = switch_to_fallback
|
||||
|
||||
|
||||
class DreamEngine:
|
||||
"""Nightly dream consolidation engine.
|
||||
|
||||
Analyzes the previous day's sessions to find:
|
||||
1. Error patterns (tool failures, retries, incomplete tasks)
|
||||
2. Time waste patterns (slow tools, repeated operations, inefficient flows)
|
||||
3. Success patterns (what worked well)
|
||||
4. Generates actionable evolution proposals
|
||||
"""
|
||||
|
||||
def __init__(self, config: dict = None):
|
||||
self.config = config or _resolve_runtime_config()
|
||||
self._model_client = None
|
||||
self._current_prompt = ""
|
||||
|
||||
def run(self, hours: int = 24, max_runtime_seconds: int = 0) -> Optional[ReflectionReport]:
|
||||
"""Main dream consolidation flow.
|
||||
|
||||
Args:
|
||||
hours: Analyze data from the last N hours.
|
||||
max_runtime_seconds: Hard timeout in seconds. 0 = no limit.
|
||||
If exceeded, stops at the next step boundary and returns None.
|
||||
"""
|
||||
logger.info("Dream engine starting — analyzing last %d hours", hours)
|
||||
|
||||
deadline = time.time() + max_runtime_seconds if max_runtime_seconds > 0 else 0
|
||||
|
||||
now = time.time()
|
||||
cutoff = now - (hours * 3600)
|
||||
|
||||
try:
|
||||
# 1. Load session data
|
||||
scores = db.fetch_all(
|
||||
"session_scores",
|
||||
where="created_at >= ?",
|
||||
params=(cutoff,),
|
||||
order_by="created_at DESC",
|
||||
)
|
||||
tool_invocations = db.fetch_all(
|
||||
"tool_invocations",
|
||||
where="created_at >= ?",
|
||||
params=(cutoff,),
|
||||
order_by="created_at DESC",
|
||||
)
|
||||
signals = db.fetch_all(
|
||||
"outcome_signals",
|
||||
where="created_at >= ?",
|
||||
params=(cutoff,),
|
||||
)
|
||||
|
||||
if not scores:
|
||||
logger.info("No sessions to analyze")
|
||||
return None
|
||||
|
||||
# 2. Error analysis
|
||||
if deadline and time.time() > deadline:
|
||||
logger.warning("Dream engine timed out before error analysis")
|
||||
return None
|
||||
error_analysis = self._analyze_errors(scores, tool_invocations, signals)
|
||||
logger.info("Error analysis: %s", error_analysis.summary())
|
||||
|
||||
# 3. Time waste analysis
|
||||
if deadline and time.time() > deadline:
|
||||
logger.warning("Dream engine timed out before waste analysis")
|
||||
return None
|
||||
waste_analysis = self._analyze_time_waste(scores, tool_invocations)
|
||||
logger.info("Waste analysis: %s", waste_analysis.summary())
|
||||
|
||||
# 3.5. Code change analysis
|
||||
if deadline and time.time() > deadline:
|
||||
logger.warning("Dream engine timed out before code analysis")
|
||||
return None
|
||||
code_analysis = analyze_code_changes(hours=hours)
|
||||
logger.info("Code change analysis: %d commits found", code_analysis.total_commits)
|
||||
|
||||
# 4. Compute average score
|
||||
avg_score = (
|
||||
sum(s.get("composite_score", 0) for s in scores) / len(scores)
|
||||
if scores else 0
|
||||
)
|
||||
|
||||
# 5. Build reflection prompt
|
||||
if deadline and time.time() > deadline:
|
||||
logger.warning("Dream engine timed out before model call")
|
||||
return None
|
||||
prompt = self._build_reflection_prompt(
|
||||
scores, tool_invocations, signals,
|
||||
error_analysis, waste_analysis, avg_score,
|
||||
code_analysis=code_analysis,
|
||||
)
|
||||
|
||||
# 6. Call model for deep reflection
|
||||
reflection_text = self._call_model(prompt)
|
||||
if not reflection_text:
|
||||
logger.warning("Model returned empty reflection")
|
||||
return None
|
||||
|
||||
# 7. Parse reflection report
|
||||
report = self._parse_reflection(
|
||||
reflection_text=reflection_text,
|
||||
period_start=cutoff,
|
||||
period_end=now,
|
||||
sessions_analyzed=len(scores),
|
||||
avg_score=avg_score,
|
||||
error_analysis=error_analysis,
|
||||
waste_analysis=waste_analysis,
|
||||
code_analysis=code_analysis,
|
||||
)
|
||||
|
||||
# 8. Store report
|
||||
report_id = db.insert("reflection_reports", report.to_db_row())
|
||||
logger.info("Reflection report saved: id=%d, avg_score=%.3f", report_id, avg_score)
|
||||
|
||||
# 9. Generate evolution proposals
|
||||
from self_evolution.evolution_proposer import generate_proposals
|
||||
proposals = generate_proposals(report, report_id)
|
||||
for p in proposals:
|
||||
db.insert("evolution_proposals", p.to_db_row())
|
||||
logger.info("Generated %d evolution proposals", len(proposals))
|
||||
|
||||
# 10. Compress existing strategies
|
||||
try:
|
||||
from self_evolution.strategy_compressor import compress_strategies
|
||||
from self_evolution.strategy_store import StrategyStore
|
||||
store = StrategyStore()
|
||||
data = store.load()
|
||||
rules = data.get("rules", [])
|
||||
compressed = compress_strategies(rules)
|
||||
if len(compressed) < len(rules):
|
||||
data["rules"] = compressed
|
||||
store.save(data)
|
||||
logger.info("Strategies compressed: %d → %d", len(rules), len(compressed))
|
||||
except Exception as exc:
|
||||
logger.warning("Strategy compression failed: %s", exc)
|
||||
|
||||
# 11. Cleanup old data
|
||||
db.cleanup(days=30)
|
||||
|
||||
return report
|
||||
|
||||
except Exception as exc:
|
||||
logger.exception("Dream engine failed: %s", exc)
|
||||
return None
|
||||
|
||||
# ── Error Analysis ────────────────────────────────────────────────────
|
||||
|
||||
def _analyze_errors(
|
||||
self,
|
||||
scores: List[dict],
|
||||
invocations: List[dict],
|
||||
signals: List[dict],
|
||||
) -> ErrorAnalysis:
|
||||
"""Analyze all errors in the period.
|
||||
|
||||
Inspired by Claude Code conversation-analyzer's signal detection.
|
||||
"""
|
||||
# Tool failures
|
||||
failures = {}
|
||||
for inv in invocations:
|
||||
if not inv.get("success", True):
|
||||
tool = inv.get("tool_name", "unknown")
|
||||
error_type = inv.get("error_type", "unknown")
|
||||
key = f"{tool}:{error_type}"
|
||||
if key not in failures:
|
||||
failures[key] = ToolFailure(
|
||||
tool_name=tool,
|
||||
error_type=error_type,
|
||||
count=0,
|
||||
sessions_affected=[],
|
||||
example_session=inv.get("session_id", ""),
|
||||
)
|
||||
failures[key].count += 1
|
||||
sid = inv.get("session_id", "")
|
||||
if sid and sid not in failures[key].sessions_affected:
|
||||
failures[key].sessions_affected.append(sid)
|
||||
|
||||
# Retry patterns (same tool called > 2 times in same session)
|
||||
retries = self._detect_retry_patterns(invocations)
|
||||
|
||||
# Incomplete sessions
|
||||
incomplete = [
|
||||
s.get("session_id", "") for s in scores
|
||||
if s.get("completion_rate", 1.0) < 0.5
|
||||
]
|
||||
|
||||
# User corrections from signals
|
||||
corrections = [s for s in signals if s.get("signal_type") == "correction"]
|
||||
frustration = [s for s in signals if s.get("signal_type") == "frustration"]
|
||||
api_errors = [s for s in signals if s.get("signal_type") == "api_error"]
|
||||
|
||||
# API error type distribution
|
||||
api_error_types: Dict[str, int] = {}
|
||||
for s in api_errors:
|
||||
meta = json.loads(s.get("metadata", "{}"))
|
||||
etype = meta.get("error_type", "unknown")
|
||||
api_error_types[etype] = api_error_types.get(etype, 0) + 1
|
||||
|
||||
return ErrorAnalysis(
|
||||
tool_failures=sorted(failures.values(), key=lambda x: x.count, reverse=True),
|
||||
retry_patterns=retries,
|
||||
incomplete_sessions=incomplete,
|
||||
user_corrections=len(corrections),
|
||||
correction_examples=[s.get("metadata", "") for s in corrections[:3]],
|
||||
api_error_count=len(api_errors),
|
||||
api_error_types=api_error_types,
|
||||
)
|
||||
|
||||
def _detect_retry_patterns(self, invocations: List[dict]) -> List[RetryPattern]:
|
||||
"""Detect tools called > 2 times in same session."""
|
||||
session_tools: Dict[str, Dict[str, int]] = {}
|
||||
for inv in invocations:
|
||||
sid = inv.get("session_id", "")
|
||||
tool = inv.get("tool_name", "")
|
||||
if sid not in session_tools:
|
||||
session_tools[sid] = {}
|
||||
session_tools[sid][tool] = session_tools[sid].get(tool, 0) + 1
|
||||
|
||||
patterns = []
|
||||
for sid, tools in session_tools.items():
|
||||
for tool, count in tools.items():
|
||||
if count > 2:
|
||||
patterns.append(RetryPattern(
|
||||
session_id=sid,
|
||||
tool_name=tool,
|
||||
attempt_count=count,
|
||||
final_outcome="unknown",
|
||||
))
|
||||
return sorted(patterns, key=lambda x: x.attempt_count, reverse=True)[:20]
|
||||
|
||||
# ── Time Waste Analysis ───────────────────────────────────────────────
|
||||
|
||||
def _analyze_time_waste(
|
||||
self,
|
||||
scores: List[dict],
|
||||
invocations: List[dict],
|
||||
) -> WasteAnalysis:
|
||||
"""Analyze time waste patterns."""
|
||||
# Slowest tools
|
||||
tool_durations: Dict[str, List[int]] = {}
|
||||
for inv in invocations:
|
||||
tool = inv.get("tool_name", "")
|
||||
duration = inv.get("duration_ms", 0)
|
||||
if not duration:
|
||||
continue
|
||||
if tool not in tool_durations:
|
||||
tool_durations[tool] = []
|
||||
tool_durations[tool].append(duration)
|
||||
|
||||
slowest = [
|
||||
ToolDuration(
|
||||
tool_name=tool,
|
||||
total_duration_ms=sum(durs),
|
||||
call_count=len(durs),
|
||||
avg_duration_ms=sum(durs) / len(durs),
|
||||
)
|
||||
for tool, durs in tool_durations.items()
|
||||
]
|
||||
slowest.sort(key=lambda x: x.avg_duration_ms, reverse=True)
|
||||
|
||||
# Repeated operations (same tool + same session > 3 times)
|
||||
session_tool_calls: Dict[str, Dict[str, int]] = {}
|
||||
for inv in invocations:
|
||||
sid = inv.get("session_id", "")
|
||||
tool = inv.get("tool_name", "")
|
||||
if sid not in session_tool_calls:
|
||||
session_tool_calls[sid] = {}
|
||||
session_tool_calls[sid][tool] = session_tool_calls[sid].get(tool, 0) + 1
|
||||
|
||||
repeated = []
|
||||
for sid, tools in session_tool_calls.items():
|
||||
for tool, count in tools.items():
|
||||
if count > 3:
|
||||
repeated.append(RepeatedOperation(
|
||||
description=f"{tool} called {count} times",
|
||||
count=count,
|
||||
sessions=[sid],
|
||||
wasted_ms=tool_durations.get(tool, [0])[0] * (count - 2) if tool in tool_durations else 0,
|
||||
))
|
||||
|
||||
# Inefficient sessions (low efficiency score)
|
||||
inefficient = [
|
||||
s.get("session_id", "") for s in scores
|
||||
if s.get("efficiency_score", 1.0) < 0.3
|
||||
]
|
||||
|
||||
return WasteAnalysis(
|
||||
slowest_tools=slowest[:10],
|
||||
repeated_operations=sorted(repeated, key=lambda x: x.count, reverse=True)[:10],
|
||||
inefficient_sessions=inefficient,
|
||||
shortcut_opportunities=[],
|
||||
)
|
||||
|
||||
# ── Reflection Prompt ─────────────────────────────────────────────────
|
||||
|
||||
def _build_reflection_prompt(
|
||||
self,
|
||||
scores: List[dict],
|
||||
invocations: List[dict],
|
||||
signals: List[dict],
|
||||
errors: ErrorAnalysis,
|
||||
waste: WasteAnalysis,
|
||||
avg_score: float,
|
||||
code_analysis: CodeChangeAnalysis = None,
|
||||
) -> str:
|
||||
"""Build the reflection prompt as structured JSON data.
|
||||
|
||||
All analysis results are serialized as JSON so the model receives
|
||||
lossless data instead of pre-summarized text.
|
||||
"""
|
||||
# Load user prompt template (short: just overview + data placeholder)
|
||||
template_path = Path(__file__).parent / "prompts" / "reflection.md"
|
||||
if template_path.exists():
|
||||
template = template_path.read_text(encoding="utf-8")
|
||||
else:
|
||||
template = _DEFAULT_REFLECTION_PROMPT
|
||||
|
||||
# Compute statistics
|
||||
total_invocations = len(invocations)
|
||||
success_rate = (
|
||||
sum(1 for i in invocations if i.get("success", True)) / total_invocations * 100
|
||||
if total_invocations else 100
|
||||
)
|
||||
|
||||
# Period range
|
||||
if scores:
|
||||
ts_min = min(s.get("created_at", 0) for s in scores)
|
||||
ts_max = max(s.get("created_at", 0) for s in scores)
|
||||
period_range = (
|
||||
f"{time.strftime('%m-%d %H:%M', time.localtime(ts_min))} ~ "
|
||||
f"{time.strftime('%m-%d %H:%M', time.localtime(ts_max))}"
|
||||
)
|
||||
else:
|
||||
period_range = "N/A"
|
||||
|
||||
# Build structured data JSON — compact format to save tokens
|
||||
data = {}
|
||||
|
||||
# 1. Sessions — compact: [score, completion, efficiency, cost, satisfaction, category]
|
||||
data["sessions"] = [
|
||||
[
|
||||
round(s.get("composite_score", 0), 2),
|
||||
round(s.get("completion_rate", 0), 2),
|
||||
round(s.get("efficiency_score", 0), 2),
|
||||
round(s.get("cost_efficiency", 0), 2),
|
||||
round(s.get("satisfaction_proxy", 0), 2),
|
||||
s.get("task_category", ""),
|
||||
]
|
||||
for s in scores
|
||||
]
|
||||
|
||||
# 2. Tool usage — compact: {tool: [calls, failures, avg_ms]}
|
||||
tool_stats: Dict[str, List[int]] = {}
|
||||
for inv in invocations:
|
||||
tool = inv.get("tool_name", "")
|
||||
if tool not in tool_stats:
|
||||
tool_stats[tool] = [0, 0, 0] # calls, failures, total_ms
|
||||
tool_stats[tool][0] += 1
|
||||
if not inv.get("success", True):
|
||||
tool_stats[tool][1] += 1
|
||||
tool_stats[tool][2] += inv.get("duration_ms", 0) or 0
|
||||
data["tools"] = {
|
||||
t: [v[0], v[1], round(v[2] / max(v[0], 1))]
|
||||
for t, v in sorted(tool_stats.items(), key=lambda x: x[1][2], reverse=True)
|
||||
}
|
||||
|
||||
# 3. Signals — compact: {type: count}
|
||||
signal_types = {}
|
||||
for s in signals:
|
||||
stype = s.get("signal_type", "unknown")
|
||||
signal_types[stype] = signal_types.get(stype, 0) + 1
|
||||
data["signals"] = signal_types
|
||||
|
||||
# 4. Errors — only non-empty fields
|
||||
err_data = {}
|
||||
if errors.tool_failures:
|
||||
err_data["tool_failures"] = [
|
||||
f"{tf.tool_name}:{tf.error_type}x{tf.count}"
|
||||
for tf in errors.tool_failures
|
||||
]
|
||||
if errors.retry_patterns:
|
||||
err_data["retries"] = [
|
||||
f"{rp.tool_name}x{rp.attempt_count}"
|
||||
for rp in errors.retry_patterns[:5]
|
||||
]
|
||||
if errors.incomplete_sessions:
|
||||
err_data["incomplete"] = len(errors.incomplete_sessions)
|
||||
if errors.user_corrections:
|
||||
err_data["corrections"] = errors.user_corrections
|
||||
if errors.correction_examples:
|
||||
err_data["correction_examples"] = errors.correction_examples[:2]
|
||||
if errors.api_error_count:
|
||||
err_data["api_errors"] = errors.api_error_count
|
||||
if err_data:
|
||||
data["errors"] = err_data
|
||||
|
||||
# 5. Waste — only non-empty
|
||||
waste_data = {}
|
||||
if waste.slowest_tools:
|
||||
waste_data["slowest"] = [
|
||||
f"{td.tool_name} {round(td.avg_duration_ms)}ms/{td.call_count}calls"
|
||||
for td in waste.slowest_tools[:5]
|
||||
]
|
||||
if waste.repeated_operations:
|
||||
waste_data["repeated"] = [
|
||||
f"{ro.description} x{ro.count}"
|
||||
for ro in waste.repeated_operations[:3]
|
||||
]
|
||||
if waste.inefficient_sessions:
|
||||
waste_data["inefficient"] = len(waste.inefficient_sessions)
|
||||
if waste_data:
|
||||
data["waste"] = waste_data
|
||||
|
||||
# 6. Code changes — flat compact format
|
||||
if code_analysis and code_analysis.commits:
|
||||
cc = code_analysis
|
||||
commits_data = []
|
||||
for c in cc.commits[:10]:
|
||||
entry = f"{c.hash_short} {c.subject} +{c.insertions}/-{c.deletions}"
|
||||
if c.file_list:
|
||||
entry += f" [{','.join(c.file_list[:5])}]"
|
||||
if c.body:
|
||||
entry += f" | {c.body[:150]}"
|
||||
commits_data.append(entry)
|
||||
data["code_changes"] = {
|
||||
"stats": f"{cc.total_commits} commits +{cc.total_insertions}/-{cc.total_deletions} lines {cc.total_files_changed} files",
|
||||
"categories": cc.change_categories,
|
||||
"areas": cc.areas_touched,
|
||||
"commits": commits_data,
|
||||
}
|
||||
|
||||
data_json = json.dumps(data, ensure_ascii=False, indent=2)
|
||||
|
||||
# Fill template
|
||||
prompt = template.replace("{period_range}", period_range)
|
||||
prompt = prompt.replace("{sessions_count}", str(len(scores)))
|
||||
prompt = prompt.replace("{avg_score}", f"{avg_score:.3f}")
|
||||
prompt = prompt.replace("{total_invocations}", str(total_invocations))
|
||||
prompt = prompt.replace("{success_rate}", f"{success_rate:.1f}")
|
||||
prompt = prompt.replace("{data_json}", data_json)
|
||||
|
||||
return prompt
|
||||
|
||||
# ── Model Call ────────────────────────────────────────────────────────
|
||||
|
||||
def _call_model(self, prompt: str) -> Optional[str]:
|
||||
"""Call the active model with automatic failover.
|
||||
|
||||
Resolution order:
|
||||
1. Primary model (glm-5.1 via zai)
|
||||
2. Fallback model (Qwen3.6 via local) — if primary fails
|
||||
Health check: when on fallback, probes primary every 30 min
|
||||
and switches back when it recovers.
|
||||
"""
|
||||
self._current_prompt = prompt
|
||||
|
||||
active_cfg, is_fallback = _get_active_text_config(self.config)
|
||||
base_url = active_cfg.get("base_url", "")
|
||||
api_key = active_cfg.get("api_key", "")
|
||||
model = active_cfg.get("model", "")
|
||||
|
||||
if not base_url or not model:
|
||||
logger.warning("Incomplete runtime config: base_url=%s model=%s",
|
||||
bool(base_url), model)
|
||||
return None
|
||||
|
||||
result = self._call_chat_completions(base_url, api_key, model)
|
||||
|
||||
# If primary failed, try fallback
|
||||
if result is None and not is_fallback:
|
||||
fallback = self.config.get("fallback", {})
|
||||
if fallback.get("base_url") and fallback.get("model"):
|
||||
logger.warning("Primary model failed, trying fallback: %s",
|
||||
fallback.get("model"))
|
||||
result = self._call_chat_completions(
|
||||
fallback["base_url"], fallback.get("api_key", ""),
|
||||
fallback["model"],
|
||||
)
|
||||
if result is not None:
|
||||
_switch_to_fallback()
|
||||
|
||||
return result
|
||||
|
||||
def _call_chat_completions(
|
||||
self, base_url: str, api_key: str, model: str,
|
||||
) -> Optional[str]:
|
||||
"""Call OpenAI-compatible /chat/completions endpoint."""
|
||||
try:
|
||||
import requests
|
||||
url = f"{base_url.rstrip('/')}/chat/completions"
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
resp = requests.post(
|
||||
url,
|
||||
headers=headers,
|
||||
json={
|
||||
"model": model,
|
||||
"messages": [
|
||||
{"role": "system", "content": _SYSTEM_PROMPT},
|
||||
{"role": "user", "content": self._current_prompt or ""},
|
||||
],
|
||||
"temperature": 0.3,
|
||||
},
|
||||
timeout=300,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
return data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
else:
|
||||
logger.debug("Model call failed: %d %s", resp.status_code, resp.text[:200])
|
||||
except Exception as exc:
|
||||
logger.debug("Chat completions call failed: %s", exc)
|
||||
return None
|
||||
|
||||
# ── Multimodal Call ───────────────────────────────────────────────────
|
||||
|
||||
def call_multimodal(self, prompt: str, images: list = None) -> Optional[str]:
|
||||
"""Call multimodal model with text and optional images.
|
||||
|
||||
Routes to local multimodal model (gemma-4-26b-a4b-it-4bit) when
|
||||
images are involved. Falls back to text model if no images.
|
||||
|
||||
Args:
|
||||
prompt: Text prompt.
|
||||
images: List of image data, each item is either:
|
||||
- URL string (http/https/data:image)
|
||||
- bytes (raw image data, auto-encoded to base64)
|
||||
|
||||
Returns:
|
||||
Model response text, or None on failure.
|
||||
"""
|
||||
mm = self.config.get("multimodal", {})
|
||||
if not mm or not mm.get("base_url"):
|
||||
logger.debug("No multimodal model configured, falling back to text")
|
||||
return self._call_model(prompt)
|
||||
|
||||
# Build content with images
|
||||
content = [{"type": "text", "text": prompt}]
|
||||
for img in (images or []):
|
||||
if isinstance(img, bytes):
|
||||
import base64
|
||||
b64 = base64.b64encode(img).decode()
|
||||
content.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/png;base64,{b64}"},
|
||||
})
|
||||
elif isinstance(img, str):
|
||||
content.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": img},
|
||||
})
|
||||
|
||||
try:
|
||||
from openai import OpenAI
|
||||
client = OpenAI(
|
||||
base_url=mm["base_url"].rstrip("/") + ("/v1" if not mm["base_url"].rstrip("/").endswith("/v1") else ""),
|
||||
api_key=mm.get("api_key") or "no-key",
|
||||
)
|
||||
resp = client.chat.completions.create(
|
||||
model=mm["model"],
|
||||
messages=[{"role": "user", "content": content}],
|
||||
temperature=0.3,
|
||||
max_tokens=2000,
|
||||
timeout=120,
|
||||
)
|
||||
return resp.choices[0].message.content
|
||||
except Exception as exc:
|
||||
logger.debug("Multimodal call failed: %s", exc)
|
||||
return None
|
||||
|
||||
# ── Reflection Parsing ────────────────────────────────────────────────
|
||||
|
||||
def _parse_reflection(
|
||||
self,
|
||||
reflection_text: str,
|
||||
period_start: float,
|
||||
period_end: float,
|
||||
sessions_analyzed: int,
|
||||
avg_score: float,
|
||||
error_analysis: ErrorAnalysis,
|
||||
waste_analysis: WasteAnalysis,
|
||||
code_analysis: CodeChangeAnalysis = None,
|
||||
) -> ReflectionReport:
|
||||
"""Parse model output into structured ReflectionReport.
|
||||
|
||||
Extraction cascade:
|
||||
1. Direct JSON parse
|
||||
2. Strip markdown ```json ... ``` wrapper, retry JSON
|
||||
3. Extract JSON object via regex (handle trailing text)
|
||||
4. Text-based section extraction (fallback)
|
||||
"""
|
||||
worst_patterns = []
|
||||
best_patterns = []
|
||||
recommendations = []
|
||||
tool_insights = {}
|
||||
|
||||
text = reflection_text.strip()
|
||||
|
||||
# 1. Direct JSON parse
|
||||
data = _try_parse_json(text)
|
||||
|
||||
if data is None:
|
||||
# 2. Strip markdown wrapper
|
||||
m = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
|
||||
if m:
|
||||
data = _try_parse_json(m.group(1))
|
||||
|
||||
if data is None:
|
||||
# 3. Regex extract first JSON object
|
||||
m = re.search(r'\{[^{}]*"(?:worst|best|recommendations)"[^{}]*\}', text, re.DOTALL)
|
||||
if m:
|
||||
data = _try_parse_json(m.group(0))
|
||||
|
||||
if data is None:
|
||||
# 3.5. Broader regex — find outermost braces
|
||||
start = text.find('{')
|
||||
end = text.rfind('}')
|
||||
if start != -1 and end > start:
|
||||
data = _try_parse_json(text[start:end + 1])
|
||||
|
||||
if data is not None:
|
||||
worst_patterns = data.get("worst_patterns") or []
|
||||
best_patterns = data.get("best_patterns") or []
|
||||
recommendations = data.get("recommendations") or []
|
||||
tool_insights = data.get("tool_insights") or {}
|
||||
else:
|
||||
# 4. Text-based extraction
|
||||
section = None
|
||||
for line in text.split("\n"):
|
||||
stripped = line.strip()
|
||||
lower = stripped.lower()
|
||||
if ("worst" in lower and "pattern" in lower) or "最差" in stripped or "错误模式" in stripped:
|
||||
section = "worst"
|
||||
elif ("best" in lower and "pattern" in lower) or "最佳" in stripped or "成功" in stripped:
|
||||
section = "best"
|
||||
elif ("recommend" in lower) or "建议" in stripped:
|
||||
section = "rec"
|
||||
elif stripped.startswith("- ") or stripped.startswith("* ") or stripped.startswith("• "):
|
||||
item = stripped.lstrip("-*• ").strip()
|
||||
if section == "worst":
|
||||
worst_patterns.append(item)
|
||||
elif section == "best":
|
||||
best_patterns.append(item)
|
||||
elif section == "rec":
|
||||
recommendations.append(item)
|
||||
elif len(stripped) > 2 and stripped[0].isdigit() and stripped[1] in ".)" and stripped[2] == " ":
|
||||
item = stripped[3:].strip()
|
||||
if section == "worst":
|
||||
worst_patterns.append(item)
|
||||
elif section == "best":
|
||||
best_patterns.append(item)
|
||||
elif section == "rec":
|
||||
recommendations.append(item)
|
||||
|
||||
return ReflectionReport(
|
||||
period_start=period_start,
|
||||
period_end=period_end,
|
||||
sessions_analyzed=sessions_analyzed,
|
||||
avg_score=avg_score,
|
||||
error_summary=error_analysis.summary(),
|
||||
waste_summary=waste_analysis.summary(),
|
||||
worst_patterns=worst_patterns,
|
||||
best_patterns=best_patterns,
|
||||
tool_insights=tool_insights,
|
||||
recommendations=recommendations,
|
||||
code_change_summary=code_analysis.summary() if code_analysis else "",
|
||||
model_used=self.config.get("model", "unknown"),
|
||||
)
|
||||
|
||||
|
||||
# ── Default Prompt Template ──────────────────────────────────────────────
|
||||
|
||||
_SYSTEM_PROMPT = (
|
||||
"你是 Hermes Agent 性能分析引擎。分析运行数据+代码变更,输出严格JSON(无markdown)。\n"
|
||||
"格式:\n"
|
||||
'{"worst_patterns":["模式(工具+场景+根因)"],"best_patterns":["成功经验"],'
|
||||
'"tool_insights":{"工具":{"sr":0.95,"ms":500,"rec":"建议"}},'
|
||||
'"recommendations":["做什么|效果|风险(l/m/h)|验证"]}\n'
|
||||
"重点:系统性错误>偶发,错误连锁,策略vs工具问题,重复操作,代码设计合理性,自我进化状态,"
|
||||
"可固化流程。≤5条建议,优先高影响低风险。无数据时输出空数组。"
|
||||
)
|
||||
|
||||
|
||||
_DEFAULT_REFLECTION_PROMPT = """## 概况
|
||||
- 时段: {period_range}
|
||||
- Session 数: {sessions_count}, 平均质量: {avg_score}
|
||||
- 工具调用: {total_invocations} 次, 成功率 {success_rate}%
|
||||
|
||||
## 数据
|
||||
{data_json}
|
||||
"""
|
||||
|
||||
|
||||
def _try_parse_json(text: str) -> Optional[dict]:
|
||||
"""Try to parse JSON, returning None on any failure."""
|
||||
try:
|
||||
data = json.loads(text)
|
||||
if isinstance(data, dict):
|
||||
return data
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
return None
|
||||
101
self_evolution/rule_engine.py
Normal file
101
self_evolution/rule_engine.py
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
"""
|
||||
Self Evolution Plugin — Rule Engine (Strategy Matching)
|
||||
========================================================
|
||||
|
||||
Conditional strategy matching engine.
|
||||
|
||||
Design reference: Claude Code plugins/hookify/core/rule_engine.py
|
||||
- LRU-cached regex compilation (max 128)
|
||||
- Multiple operators: regex_match, contains, equals, not_contains
|
||||
- All conditions must match (AND logic)
|
||||
- Severity levels: high, medium, low
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from functools import lru_cache
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from self_evolution.models import StrategyRule, StrategyCondition
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def _compile_pattern(pattern: str) -> re.Pattern:
|
||||
"""Compile and cache a regex pattern."""
|
||||
return re.compile(pattern, re.IGNORECASE)
|
||||
|
||||
|
||||
class StrategyRuleEngine:
|
||||
"""Evaluate strategy rules against session context."""
|
||||
|
||||
def match_strategies(
|
||||
self,
|
||||
strategies: List[StrategyRule],
|
||||
context: Dict[str, Any],
|
||||
) -> List[StrategyRule]:
|
||||
"""Return strategies whose conditions match the context."""
|
||||
matched = []
|
||||
for strategy in strategies:
|
||||
if not strategy.enabled:
|
||||
continue
|
||||
if not strategy.conditions:
|
||||
# No conditions = always match
|
||||
matched.append(strategy)
|
||||
continue
|
||||
if self._conditions_match(strategy.conditions, context):
|
||||
matched.append(strategy)
|
||||
return matched
|
||||
|
||||
def _conditions_match(
|
||||
self,
|
||||
conditions: List[StrategyCondition],
|
||||
context: Dict[str, Any],
|
||||
) -> bool:
|
||||
"""All conditions must match (AND logic)."""
|
||||
for cond in conditions:
|
||||
field_value = str(context.get(cond.field, ""))
|
||||
if not self._check_operator(cond.operator, cond.pattern, field_value):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _check_operator(self, op: str, pattern: str, value: str) -> bool:
|
||||
"""Apply operator check."""
|
||||
try:
|
||||
if op == "regex_match":
|
||||
return bool(_compile_pattern(pattern).search(value))
|
||||
elif op == "contains":
|
||||
return pattern in value
|
||||
elif op == "equals":
|
||||
return pattern == value
|
||||
elif op == "not_contains":
|
||||
return pattern not in value
|
||||
elif op == "starts_with":
|
||||
return value.startswith(pattern)
|
||||
elif op == "ends_with":
|
||||
return value.endswith(pattern)
|
||||
else:
|
||||
return False
|
||||
except re.error:
|
||||
return False
|
||||
|
||||
def format_hints(self, strategies: List[StrategyRule], max_chars: int = 0) -> str:
|
||||
"""Format matched strategies into a system hint string.
|
||||
|
||||
Args:
|
||||
max_chars: If > 0, truncate total output to this many characters.
|
||||
"""
|
||||
if not strategies:
|
||||
return ""
|
||||
|
||||
lines = ["[自我进化策略提示]"]
|
||||
for s in strategies:
|
||||
type_prefix = {"hint": "💡", "avoid": "⚠️", "prefer": "✅"}.get(
|
||||
s.strategy_type, "💡"
|
||||
)
|
||||
line = f"{type_prefix} {s.name}: {s.hint_text}"
|
||||
if max_chars and len("\n".join(lines)) + len(line) > max_chars:
|
||||
break
|
||||
lines.append(line)
|
||||
|
||||
return "\n".join(lines)
|
||||
141
self_evolution/strategy_compressor.py
Normal file
141
self_evolution/strategy_compressor.py
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
"""
|
||||
Self Evolution Plugin — Strategy Compressor
|
||||
=============================================
|
||||
|
||||
Compresses and merges redundant strategy rules into concise hints.
|
||||
|
||||
Called after dream consolidation to keep strategies.json compact.
|
||||
Each hint_text must be ≤ 30 chars; strategies without conditions are
|
||||
either merged into conditional rules or discarded.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Any, Dict, List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Maximum allowed length for hint_text (characters)
|
||||
MAX_HINT_LENGTH = 30
|
||||
|
||||
# Keyword clusters used to group similar strategies
|
||||
_CLUSTERS: List[Dict[str, Any]] = [
|
||||
{
|
||||
"keywords": ["bash", "路径", "path", "校验", "预检", "验证", "存在"],
|
||||
"hint": "bash前先read验证路径",
|
||||
"condition": {"field": "tool_name", "operator": "contains", "pattern": "bash"},
|
||||
},
|
||||
{
|
||||
"keywords": ["api", "调试", "debug", "降级", "只读", "探查"],
|
||||
"hint": "API失败时降级只读探查",
|
||||
"condition": {"field": "task_type", "operator": "contains", "pattern": "api"},
|
||||
},
|
||||
{
|
||||
"keywords": ["browser", "浏览器", "timeout", "超时", "网页"],
|
||||
"hint": "浏览器操作设置超时保护",
|
||||
"condition": {"field": "tool_name", "operator": "contains", "pattern": "browser"},
|
||||
},
|
||||
{
|
||||
"keywords": ["重试", "retry", "浪费", "重复", "循环"],
|
||||
"hint": "避免重复重试相同操作",
|
||||
"condition": {},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def compress_strategies(rules: List[dict]) -> List[dict]:
|
||||
"""Compress strategy rules by merging similar ones.
|
||||
|
||||
Returns a new list of rules with:
|
||||
- Duplicate hint_texts removed
|
||||
- Similar rules merged into cluster summaries
|
||||
- hint_text truncated to MAX_HINT_LENGTH
|
||||
- Non-matching rules dropped if they have no conditions
|
||||
"""
|
||||
if not rules:
|
||||
return []
|
||||
|
||||
# Deduplicate by hint_text
|
||||
seen_hints: set[str] = set()
|
||||
unique: list[dict] = []
|
||||
for r in rules:
|
||||
key = r.get("hint_text", "").strip().lower()
|
||||
if key and key not in seen_hints:
|
||||
seen_hints.add(key)
|
||||
unique.append(r)
|
||||
|
||||
# Cluster similar rules
|
||||
clustered = _cluster_rules(unique)
|
||||
|
||||
# Enforce constraints: hint_text ≤ 30 chars, must have conditions
|
||||
result: list[dict] = []
|
||||
for r in clustered:
|
||||
hint = r.get("hint_text", "").strip()
|
||||
conditions = r.get("conditions", [])
|
||||
|
||||
# Skip rules without conditions (they won't be injected anyway)
|
||||
if not conditions:
|
||||
logger.debug("Dropping unconditioned strategy: %s", hint[:40])
|
||||
continue
|
||||
|
||||
# Truncate hint if needed
|
||||
if len(hint) > MAX_HINT_LENGTH:
|
||||
hint = hint[:MAX_HINT_LENGTH]
|
||||
r["hint_text"] = hint
|
||||
|
||||
result.append(r)
|
||||
|
||||
# Also keep any manual/default rules that already have conditions
|
||||
for r in unique:
|
||||
if r.get("source") in ("manual", "default") and r.get("conditions"):
|
||||
if r not in result:
|
||||
hint = r.get("hint_text", "").strip()
|
||||
if len(hint) > MAX_HINT_LENGTH:
|
||||
r["hint_text"] = hint[:MAX_HINT_LENGTH]
|
||||
result.append(r)
|
||||
|
||||
logger.info("Compressed strategies: %d → %d rules", len(rules), len(result))
|
||||
return result
|
||||
|
||||
|
||||
def _cluster_rules(rules: list[dict]) -> list[dict]:
|
||||
"""Group rules by keyword clusters and merge each group into one rule."""
|
||||
matched_indices: set[int] = set()
|
||||
merged: list[dict] = []
|
||||
|
||||
for cluster in _CLUSTERS:
|
||||
group: list[dict] = []
|
||||
for i, r in enumerate(rules):
|
||||
text = f"{r.get('name', '')} {r.get('hint_text', '')}".lower()
|
||||
if any(kw in text for kw in cluster["keywords"]):
|
||||
group.append(r)
|
||||
matched_indices.add(i)
|
||||
|
||||
if not group:
|
||||
continue
|
||||
|
||||
# Merge group into one rule
|
||||
first = group[0]
|
||||
condition = cluster.get("condition")
|
||||
merged_rule = {
|
||||
"id": first.get("id", ""),
|
||||
"name": cluster["hint"],
|
||||
"type": "learned",
|
||||
"description": cluster["hint"],
|
||||
"hint_text": cluster["hint"],
|
||||
"conditions": [condition] if condition else [],
|
||||
"severity": "medium",
|
||||
"enabled": True,
|
||||
"source": "learned",
|
||||
"created_at": first.get("created_at", 0),
|
||||
}
|
||||
merged.append(merged_rule)
|
||||
|
||||
# Add unmatched rules as-is
|
||||
for i, r in enumerate(rules):
|
||||
if i not in matched_indices:
|
||||
merged.append(r)
|
||||
|
||||
return merged
|
||||
124
self_evolution/strategy_injector.py
Normal file
124
self_evolution/strategy_injector.py
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
"""
|
||||
Self Evolution Plugin — Strategy Injector
|
||||
===========================================
|
||||
|
||||
Injects learned strategy hints into sessions via pre_llm_call hook.
|
||||
|
||||
Design reference: Claude Code plugins/learning-output-style/
|
||||
- SessionStart hook injects behavioral context automatically
|
||||
- Equivalent to CLAUDE.md but more flexible and distributable
|
||||
- No core modification needed
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from self_evolution.models import StrategyRule
|
||||
from self_evolution.rule_engine import StrategyRuleEngine
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_engine = StrategyRuleEngine()
|
||||
|
||||
# ── TTL-based cache to avoid reading strategies.json on every LLM call ────
|
||||
|
||||
_cached_strategies: list | None = None
|
||||
_cache_ts: float = 0.0
|
||||
_CACHE_TTL: float = 60.0 # seconds
|
||||
|
||||
|
||||
def _load_active_strategies() -> list:
|
||||
"""Load active strategies from strategy store (cached for _CACHE_TTL)."""
|
||||
global _cached_strategies, _cache_ts
|
||||
|
||||
now = time.time()
|
||||
if _cached_strategies is not None and (now - _cache_ts) < _CACHE_TTL:
|
||||
return _cached_strategies
|
||||
|
||||
from self_evolution.strategy_store import StrategyStore
|
||||
|
||||
store = StrategyStore()
|
||||
data = store.load()
|
||||
rules = data.get("rules", [])
|
||||
|
||||
strategies = []
|
||||
for rule_data in rules:
|
||||
if not rule_data.get("enabled", True):
|
||||
continue
|
||||
strategy = StrategyRule.from_dict(rule_data)
|
||||
strategies.append(strategy)
|
||||
|
||||
_cached_strategies = strategies
|
||||
_cache_ts = now
|
||||
return strategies
|
||||
|
||||
|
||||
def invalidate_cache():
|
||||
"""Invalidate the strategy cache (call after strategy updates)."""
|
||||
global _cached_strategies
|
||||
_cached_strategies = None
|
||||
|
||||
|
||||
_MAX_INJECT_STRATEGIES = 3 # 最多注入策略数
|
||||
_MAX_HINT_CHARS = 100 # 注入提示总字符预算
|
||||
_MAX_SINGLE_HINT = 30 # 单条 hint_text 最大字符数
|
||||
|
||||
def inject_hints(kwargs: dict) -> Optional[str]:
|
||||
"""Pre-llm-call hook: inject learned strategy hints.
|
||||
|
||||
Rules:
|
||||
- Strategies without conditions are skipped (must be condition-based).
|
||||
- hint_text longer than _MAX_SINGLE_HINT chars are skipped.
|
||||
- At most _MAX_INJECT_STRATEGIES hints, total ≤ _MAX_HINT_CHARS.
|
||||
"""
|
||||
strategies = _load_active_strategies()
|
||||
if not strategies:
|
||||
return None
|
||||
|
||||
# Build context from current session
|
||||
context = _build_context(kwargs)
|
||||
|
||||
# Match strategies
|
||||
matched = _engine.match_strategies(strategies, context)
|
||||
if not matched:
|
||||
return None
|
||||
|
||||
# Filter: require conditions and enforce hint length
|
||||
eligible = []
|
||||
for s in matched:
|
||||
if not s.conditions:
|
||||
continue # Skip unconditioned strategies
|
||||
if len(s.hint_text.strip()) > _MAX_SINGLE_HINT:
|
||||
continue # Skip overly long hints
|
||||
eligible.append(s)
|
||||
|
||||
if not eligible:
|
||||
return None
|
||||
|
||||
# Deduplicate by hint_text content
|
||||
seen_hints: set[str] = set()
|
||||
unique: list = []
|
||||
for s in eligible:
|
||||
key = s.hint_text.strip().lower()
|
||||
if key not in seen_hints:
|
||||
seen_hints.add(key)
|
||||
unique.append(s)
|
||||
|
||||
# Cap count
|
||||
selected = unique[:_MAX_INJECT_STRATEGIES]
|
||||
|
||||
# Format hints within char budget
|
||||
return _engine.format_hints(selected, max_chars=_MAX_HINT_CHARS)
|
||||
|
||||
|
||||
def _build_context(kwargs: dict) -> dict:
|
||||
"""Build matching context from hook kwargs."""
|
||||
return {
|
||||
"platform": kwargs.get("platform", ""),
|
||||
"model": kwargs.get("model", ""),
|
||||
"task_type": kwargs.get("task_type", ""),
|
||||
"tool_name": kwargs.get("tool_name", ""),
|
||||
}
|
||||
72
self_evolution/strategy_store.py
Normal file
72
self_evolution/strategy_store.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
"""
|
||||
Self Evolution Plugin — Strategy Store
|
||||
========================================
|
||||
|
||||
Manages strategy rules with version history and rollback support.
|
||||
|
||||
Strategies stored at ~/.hermes/self_evolution/strategies.json
|
||||
Archives at ~/.hermes/self_evolution/archive/strategies_v{N}.json
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from self_evolution.paths import DATA_DIR as STRATEGIES_DIR, STRATEGIES_FILE, ARCHIVE_DIR
|
||||
|
||||
|
||||
class StrategyStore:
|
||||
"""Load, save, and version strategy rules."""
|
||||
|
||||
def load(self) -> dict:
|
||||
"""Load current strategies."""
|
||||
if not STRATEGIES_FILE.exists():
|
||||
return {"version": 0, "rules": []}
|
||||
try:
|
||||
return json.loads(STRATEGIES_FILE.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return {"version": 0, "rules": []}
|
||||
|
||||
def save(self, data: dict):
|
||||
"""Save strategies to file."""
|
||||
STRATEGIES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
STRATEGIES_FILE.write_text(
|
||||
json.dumps(data, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
def archive(self, version: int):
|
||||
"""Archive current strategies for rollback."""
|
||||
if not STRATEGIES_FILE.exists():
|
||||
return
|
||||
ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
archive_path = ARCHIVE_DIR / f"strategies_v{version}.json"
|
||||
archive_path.write_text(
|
||||
STRATEGIES_FILE.read_text(encoding="utf-8"),
|
||||
encoding="utf-8",
|
||||
)
|
||||
logger.info("Archived strategies version %d", version)
|
||||
|
||||
def load_archive(self, version: int) -> Optional[dict]:
|
||||
"""Load an archived version."""
|
||||
archive_path = ARCHIVE_DIR / f"strategies_v{version}.json"
|
||||
if not archive_path.exists():
|
||||
return None
|
||||
try:
|
||||
return json.loads(archive_path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
|
||||
def restore(self, data: dict):
|
||||
"""Restore strategies from an archive."""
|
||||
self.save(data)
|
||||
logger.info("Restored strategies from archive")
|
||||
|
||||
def get_version(self) -> int:
|
||||
"""Get current version number."""
|
||||
return self.load().get("version", 0)
|
||||
1253
tests/test_self_evolution.py
Normal file
1253
tests/test_self_evolution.py
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue