hermes-agent/self_evolution/git_analyzer.py
玉冰 3cd384dc43 feat: add self-evolution plugin — agent self-optimization system
Add a comprehensive self-evolution system that enables Hermes Agent
to continuously improve through automated analysis and optimization:

Core components:
- reflection_engine: Nightly session analysis (1:00 AM)
- evolution_proposer: Generate improvement proposals from insights
- quality_scorer: Multi-dimensional session quality evaluation
- strategy_injector: Inject learned strategies into new sessions
- strategy_compressor: Strategy optimization and deduplication
- git_analyzer: Code change pattern analysis
- rule_engine: Pattern-based rule generation
- feishu_notifier: Feishu card notifications for evolution events

Storage:
- db.py: SQLite telemetry storage
- strategy_store: Persistent strategy storage
- models.py: Data models

Plugin integration:
- plugin.yaml, hooks.py, __init__.py for plugin system
- cron_jobs.py for scheduled tasks

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-25 00:40:13 +08:00

170 lines
5.5 KiB
Python

"""
Self Evolution Plugin — Git Analysis
=====================================
Analyzes git commit history for the dream consolidation engine.
Uses a single batched ``git log --stat --name-only`` call instead of
25+ individual subprocess invocations.
Extracted from reflection_engine.py for single-responsibility.
"""
from __future__ import annotations
import logging
import re
import subprocess
import time
from pathlib import Path
from typing import Dict
from self_evolution.models import CodeChangeAnalysis, CommitInfo
logger = logging.getLogger(__name__)
def analyze_code_changes(hours: int = 24) -> CodeChangeAnalysis:
"""Analyze git commits from the previous period.
Uses a single batched git log call with --stat --name-only
instead of 25+ individual subprocess calls.
"""
project_root = str(Path(__file__).resolve().parent.parent)
cutoff_epoch = time.time() - (hours * 3600)
cutoff_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(cutoff_epoch))
try:
# Single batched call: format + shortstat + name-only
result = subprocess.run(
["git", "log",
"--format=COMMITSTART%h%n%s%n%an%n%at%n%b%nENDHEADER",
"--shortstat", "--name-only",
"--no-merges", f"--since={cutoff_date}", "-15"],
capture_output=True, text=True, timeout=30,
cwd=project_root,
)
if result.returncode != 0 or not result.stdout.strip():
return CodeChangeAnalysis()
commits = _parse_batched_output(result.stdout)
if not commits:
return CodeChangeAnalysis()
# Aggregate stats
total_ins = sum(c.insertions for c in commits)
total_del = sum(c.deletions for c in commits)
total_files = sum(c.files_changed for c in commits)
authors = list(dict.fromkeys(c.author for c in commits))
# Categorize by conventional commit prefix
categories: Dict[str, int] = {}
for c in commits:
cat = _categorize_commit(c.subject)
categories[cat] = categories.get(cat, 0) + 1
# Extract top-level module areas
all_files = []
for c in commits:
all_files.extend(c.file_list)
areas = list(dict.fromkeys(
f.split("/")[0] for f in all_files
if "/" in f and not f.startswith(".")
))[:10]
return CodeChangeAnalysis(
commits=commits,
total_commits=len(commits),
total_insertions=total_ins,
total_deletions=total_del,
total_files_changed=total_files,
authors=authors,
change_categories=categories,
areas_touched=areas,
)
except (subprocess.SubprocessError, FileNotFoundError, OSError):
logger.debug("git analysis unavailable", exc_info=True)
return CodeChangeAnalysis()
def _parse_batched_output(stdout: str) -> list:
"""Parse the batched git log output into CommitInfo objects."""
commits = []
raw_commits = stdout.split("COMMITSTART")
for raw in raw_commits:
raw = raw.strip()
if not raw:
continue
header_end = raw.find("ENDHEADER")
if header_end < 0:
continue
header = raw[:header_end].strip()
lines = header.split("\n")
if len(lines) < 4:
continue
hash_short = lines[0].strip()
subject = lines[1].strip()
author = lines[2].strip()
try:
timestamp = float(lines[3].strip())
except ValueError:
continue
body = "\n".join(lines[4:]).strip()[:500]
# After ENDHEADER: shortstat line(s) + file list
rest = raw[header_end + len("ENDHEADER"):].strip()
files_changed = 0
insertions = 0
deletions = 0
file_list = []
stat_done = False
for rline in rest.split("\n"):
rline = rline.strip()
if not rline:
continue
if not stat_done and ("files changed" in rline or "file changed" in rline
or "insertion" in rline or "deletion" in rline):
files_changed = _parse_int(r'(\d+) files? changed', rline)
insertions = _parse_int(r'(\d+) insertion', rline)
deletions = _parse_int(r'(\d+) deletion', rline)
stat_done = True
continue
if "/" in rline or "." in rline:
file_list.append(rline)
commits.append(CommitInfo(
hash_short=hash_short,
subject=subject,
body=body,
author=author,
timestamp=timestamp,
files_changed=files_changed,
insertions=insertions,
deletions=deletions,
file_list=file_list[:20],
))
return commits
# ── Helpers ───────────────────────────────────────────────────────────────
def _parse_int(pattern: str, text: str) -> int:
"""Extract first integer matching regex pattern from text."""
m = re.search(pattern, text)
return int(m.group(1)) if m else 0
def _categorize_commit(subject: str) -> str:
"""Categorize commit by conventional commit prefix."""
s = subject.lower()
for prefix in ("feat", "fix", "refactor", "test", "docs", "chore", "perf", "style", "ci", "build"):
if s.startswith(prefix):
return prefix
return "other"