hermes-agent/code_stats.py
玉冰 b937a65d83 feat: add utility scripts for code stats and Claude ACP bridge
- code_stats.py: Repository code statistics analyzer
- count_lines.py: Line counting utility
- scripts/claude_acp_bridge.py: Bridge script for Claude Agent Communication Protocol

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-25 00:39:35 +08:00

217 lines
7.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""代码统计分析工具 - 按目录/类型分组,区分代码行/注释行/空行ASCII柱状图"""
import os
import sys
from pathlib import Path
from collections import defaultdict
# 文件类型 → (单行注释, 多行开始, 多行结束)
LANG_MAP = {
".py": ("#", '"""', '"""'),
".js": ("//", "/*", "*/"),
".ts": ("//", "/*", "*/"),
".tsx": ("//", "/*", "*/"),
".jsx": ("//", "/*", "*/"),
".java": ("//", "/*", "*/"),
".c": ("//", "/*", "*/"),
".cpp": ("//", "/*", "*/"),
".h": ("//", "/*", "*/"),
".go": ("//", "/*", "*/"),
".rs": ("//", "/*", "*/"),
".rb": ("#", "=begin", "=end"),
".sh": ("#", None, None),
".bash": ("#", None, None),
".yml": ("#", None, None),
".yaml": ("#", None, None),
".toml": ("#", None, None),
".sql": ("--", "/*", "*/"),
".html": (None, "<!--", "-->"),
".css": (None, "/*", "*/"),
".vue": ("//", "/*", "*/"),
".swift": ("//", "/*", "*/"),
".kt": ("//", "/*", "*/"),
".lua": ("--", "--[[", "]]"),
".r": ("#", None, None),
".php": ("//", "/*", "*/"),
}
SKIP_DIRS = {
".git", ".svn", ".hg", "node_modules", "__pycache__",
".venv", "venv", "env", ".env", ".tox", "dist", "build",
".mypy_cache", ".pytest_cache", ".eggs", "target", "vendor",
".next", ".nuxt", "coverage",
}
def should_skip(path: Path) -> bool:
return any(part in SKIP_DIRS for part in path.parts)
def analyze_file(filepath: Path) -> dict:
ext = filepath.suffix.lower()
if ext not in LANG_MAP:
return None
try:
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
lines = f.readlines()
except (OSError, PermissionError):
return None
line_comment, block_start, block_end = LANG_MAP[ext]
total = len(lines)
blank = 0
comment = 0
in_block = False
for line in lines:
stripped = line.strip()
if not stripped:
blank += 1
continue
if in_block:
comment += 1
if block_end and block_end in stripped:
in_block = False
continue
if block_start and stripped.startswith(block_start):
comment += 1
if block_end and block_end not in stripped[len(block_start):]:
in_block = True
continue
if line_comment and stripped.startswith(line_comment):
comment += 1
return {"ext": ext, "total": total, "code": total - blank - comment, "comment": comment, "blank": blank}
def bar(value, max_val, width=25):
if max_val == 0:
return ""
return "" * int(width * value / max_val) + "" * (width - int(width * value / max_val))
def stacked_bar(code, comment, blank, total, width=30):
"""三段式柱状图:█代码 ▓注释 ░空行"""
if total == 0:
return "" * width
cw = int(width * code / total)
mw = int(width * comment / total)
bw = width - cw - mw
return "" * cw + "" * mw + "" * bw
def fmt(n):
"""数字格式化"""
return f"{n:,}"
def print_table(title, rows, headers, col_widths, grand_row=None):
"""通用表格打印"""
print(f"\n📁 {title}")
header_line = "".join(h.rjust(w) for h, w in zip(headers, col_widths))
print(header_line)
print("" * len(header_line))
for row in rows:
print("".join(str(v).rjust(w) for v, w in zip(row, col_widths)))
if grand_row:
print("" * len(header_line))
print("".join(str(v).rjust(w) for v, w in zip(grand_row, col_widths)))
def main():
target = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(".")
if not target.is_dir():
print(f"❌ 不是有效目录: {target}")
sys.exit(1)
# 收集所有文件的分析结果
file_results = []
for filepath in target.rglob("*"):
if not filepath.is_file() or should_skip(filepath):
continue
result = analyze_file(filepath)
if result:
rel = filepath.relative_to(target)
file_results.append({"path": rel, **result})
if not file_results:
print("未找到可分析的代码文件。")
return
# 汇总
grand = {"files": len(file_results), "total": 0, "code": 0, "comment": 0, "blank": 0}
for r in file_results:
for k in ("total", "code", "comment", "blank"):
grand[k] += r[k]
comment_rate = grand["comment"] / max(grand["code"] + grand["comment"], 1) * 100
print(f"\n📊 代码统计 — {target.resolve()}")
print(f" {grand['files']} 个文件 | {fmt(grand['total'])} 行 (代码 {fmt(grand['code'])} / 注释 {fmt(grand['comment'])} / 空行 {fmt(grand['blank'])})")
print(f" 代码占比 {grand['code']/max(grand['total'],1)*100:.1f}% | 注释率 {comment_rate:.1f}%")
# ── 按目录分组 ──
dir_stats = defaultdict(lambda: {"files": 0, "total": 0, "code": 0, "comment": 0, "blank": 0})
for r in file_results:
top_dir = r["path"].parts[0] if len(r["path"].parts) > 1 else "[root]"
bucket = dir_stats[top_dir]
bucket["files"] += 1
for k in ("total", "code", "comment", "blank"):
bucket[k] += r[k]
headers = ["目录", "文件数", "代码行", "注释行", "空行", "注释率"]
col_widths = [16, 8, 10, 10, 10, 9]
rows = []
for d, b in sorted(dir_stats.items(), key=lambda x: x[1]["code"], reverse=True):
cr = b["comment"] / max(b["code"] + b["comment"], 1) * 100
rows.append([d, str(b["files"]), fmt(b["code"]), fmt(b["comment"]), fmt(b["blank"]), f"{cr:.1f}%"])
print_table("按目录分组", rows, headers, col_widths,
grand_row=["合计", str(grand["files"]), fmt(grand["code"]), fmt(grand["comment"]), fmt(grand["blank"]), f"{comment_rate:.1f}%"])
# 按目录柱状图
max_code = max(b["code"] for b in dir_stats.values())
print(f"\n📊 目录代码量分布")
for d, b in sorted(dir_stats.items(), key=lambda x: x[1]["code"], reverse=True):
pct = b["code"] / max(grand["code"], 1) * 100
print(f" {d:<14} {bar(b['code'], max_code, 30)} {pct:5.1f}%")
# ── 按文件类型 ──
ext_stats = defaultdict(lambda: {"files": 0, "total": 0, "code": 0, "comment": 0, "blank": 0})
for r in file_results:
bucket = ext_stats[r["ext"]]
bucket["files"] += 1
for k in ("total", "code", "comment", "blank"):
bucket[k] += r[k]
headers2 = ["类型", "文件数", "代码行", "注释行", "空行", "注释率"]
col_widths2 = [10, 8, 10, 10, 10, 9]
rows2 = []
for ext, b in sorted(ext_stats.items(), key=lambda x: x[1]["code"], reverse=True):
cr = b["comment"] / max(b["code"] + b["comment"], 1) * 100
rows2.append([ext, str(b["files"]), fmt(b["code"]), fmt(b["comment"]), fmt(b["blank"]), f"{cr:.1f}%"])
print_table("按文件类型", rows2, headers2, col_widths2,
grand_row=["合计", str(grand["files"]), fmt(grand["code"]), fmt(grand["comment"]), fmt(grand["blank"]), f"{comment_rate:.1f}%"])
# 按类型柱状图
max_code_ext = max(b["code"] for b in ext_stats.values())
print(f"\n📊 类型代码量分布")
for ext, b in sorted(ext_stats.items(), key=lambda x: x[1]["code"], reverse=True):
pct = b["code"] / max(grand["code"], 1) * 100
print(f" {ext:<8} {bar(b['code'], max_code_ext, 30)} {pct:5.1f}%")
# ── 综合堆叠柱状图(按目录) ──
print(f"\n📊 目录代码结构(█代码 ▓注释 ░空行)")
for d, b in sorted(dir_stats.items(), key=lambda x: x[1]["code"], reverse=True):
print(f" {d:<14} {stacked_bar(b['code'], b['comment'], b['blank'], b['total'], 40)}")
print()
if __name__ == "__main__":
main()