mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Three problems fixed: 1. bobashopcashier missing from v0.9.0 contributor list despite authoring the gateway drain PR (#7290, salvaged into #7503). Their email (kennyx102@gmail.com) was missing from AUTHOR_MAP. 2. release.py only scanned git commit authors, missing Co-authored-by trailers. Now parse_coauthors() extracts trailers from commit bodies. 3. No mechanism to detect contributors from salvaged PRs (where original author only appears in PR description, not git log). Changes: - scripts/release.py: add kennyx102@gmail.com to AUTHOR_MAP, enhance get_commits() to parse Co-authored-by trailers, filter AI assistants (Claude, Copilot, Cursor Agent) from co-author lists - scripts/contributor_audit.py: new script that cross-references git authors, co-author trailers, and salvaged PR descriptions. Reports unknown emails and contributors missing from release notes. - RELEASE_v0.9.0.md: add bobashopcashier to community contributors Usage: python scripts/contributor_audit.py --since-tag v2026.4.8 python scripts/contributor_audit.py --since-tag v2026.4.8 --release-file RELEASE_v0.9.0.md
424 lines
14 KiB
Python
424 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""Contributor Audit Script
|
|
|
|
Cross-references git authors, Co-authored-by trailers, and salvaged PR
|
|
descriptions to find any contributors missing from the release notes.
|
|
|
|
Usage:
|
|
# Basic audit since a tag
|
|
python scripts/contributor_audit.py --since-tag v2026.4.8
|
|
|
|
# Audit with a custom endpoint
|
|
python scripts/contributor_audit.py --since-tag v2026.4.8 --until v2026.4.13
|
|
|
|
# Compare against a release notes file
|
|
python scripts/contributor_audit.py --since-tag v2026.4.8 --release-file RELEASE_v0.9.0.md
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from collections import defaultdict
|
|
from pathlib import Path
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Import AUTHOR_MAP and resolve_author from the sibling release.py module
|
|
# ---------------------------------------------------------------------------
|
|
SCRIPT_DIR = Path(__file__).resolve().parent
|
|
sys.path.insert(0, str(SCRIPT_DIR))
|
|
|
|
from release import AUTHOR_MAP, resolve_author # noqa: E402
|
|
|
|
REPO_ROOT = SCRIPT_DIR.parent
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# AI assistants, bots, and machine accounts to exclude from contributor lists
|
|
# ---------------------------------------------------------------------------
|
|
IGNORED_PATTERNS = [
|
|
re.compile(r"^Claude", re.IGNORECASE),
|
|
re.compile(r"^Copilot$", re.IGNORECASE),
|
|
re.compile(r"^Cursor\s+Agent$", re.IGNORECASE),
|
|
re.compile(r"^GitHub\s*Actions?$", re.IGNORECASE),
|
|
re.compile(r"^dependabot", re.IGNORECASE),
|
|
re.compile(r"^renovate", re.IGNORECASE),
|
|
re.compile(r"^Hermes\s+(Agent|Audit)$", re.IGNORECASE),
|
|
re.compile(r"^Ubuntu$", re.IGNORECASE),
|
|
]
|
|
|
|
IGNORED_EMAILS = {
|
|
"noreply@anthropic.com",
|
|
"noreply@github.com",
|
|
"cursoragent@cursor.com",
|
|
"hermes@nousresearch.com",
|
|
"hermes-audit@example.com",
|
|
"hermes@habibilabs.dev",
|
|
}
|
|
|
|
|
|
def is_ignored(handle: str, email: str = "") -> bool:
|
|
"""Return True if this contributor is a bot/AI/machine account."""
|
|
if email in IGNORED_EMAILS:
|
|
return True
|
|
for pattern in IGNORED_PATTERNS:
|
|
if pattern.search(handle):
|
|
return True
|
|
return False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def git(*args, cwd=None):
|
|
"""Run a git command and return stdout."""
|
|
result = subprocess.run(
|
|
["git"] + list(args),
|
|
capture_output=True,
|
|
text=True,
|
|
cwd=cwd or str(REPO_ROOT),
|
|
)
|
|
if result.returncode != 0:
|
|
print(f" [warn] git {' '.join(args)} failed: {result.stderr.strip()}", file=sys.stderr)
|
|
return ""
|
|
return result.stdout.strip()
|
|
|
|
|
|
def gh_pr_list():
|
|
"""Fetch merged PRs from GitHub using the gh CLI.
|
|
|
|
Returns a list of dicts with keys: number, title, body, author.
|
|
Returns an empty list if gh is not available or the call fails.
|
|
"""
|
|
try:
|
|
result = subprocess.run(
|
|
[
|
|
"gh", "pr", "list",
|
|
"--repo", "NousResearch/hermes-agent",
|
|
"--state", "merged",
|
|
"--json", "number,title,body,author,mergedAt",
|
|
"--limit", "300",
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=60,
|
|
)
|
|
if result.returncode != 0:
|
|
print(f" [warn] gh pr list failed: {result.stderr.strip()}", file=sys.stderr)
|
|
return []
|
|
return json.loads(result.stdout)
|
|
except FileNotFoundError:
|
|
print(" [warn] 'gh' CLI not found — skipping salvaged PR scan.", file=sys.stderr)
|
|
return []
|
|
except subprocess.TimeoutExpired:
|
|
print(" [warn] gh pr list timed out — skipping salvaged PR scan.", file=sys.stderr)
|
|
return []
|
|
except json.JSONDecodeError:
|
|
print(" [warn] gh pr list returned invalid JSON — skipping salvaged PR scan.", file=sys.stderr)
|
|
return []
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Contributor collection
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Patterns that indicate salvaged/cherry-picked/co-authored work in PR bodies
|
|
SALVAGE_PATTERNS = [
|
|
# "Salvaged from @username" or "Salvaged from #123"
|
|
re.compile(r"[Ss]alvaged\s+from\s+@(\w[\w-]*)"),
|
|
re.compile(r"[Ss]alvaged\s+from\s+#(\d+)"),
|
|
# "Cherry-picked from @username"
|
|
re.compile(r"[Cc]herry[- ]?picked\s+from\s+@(\w[\w-]*)"),
|
|
# "Based on work by @username"
|
|
re.compile(r"[Bb]ased\s+on\s+work\s+by\s+@(\w[\w-]*)"),
|
|
# "Original PR by @username"
|
|
re.compile(r"[Oo]riginal\s+PR\s+by\s+@(\w[\w-]*)"),
|
|
# "Co-authored with @username"
|
|
re.compile(r"[Cc]o[- ]?authored\s+with\s+@(\w[\w-]*)"),
|
|
]
|
|
|
|
# Pattern for Co-authored-by trailers in commit messages
|
|
CO_AUTHORED_RE = re.compile(
|
|
r"Co-authored-by:\s*(.+?)\s*<([^>]+)>",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
|
|
def collect_commit_authors(since_tag, until="HEAD"):
|
|
"""Collect contributors from git commit authors.
|
|
|
|
Returns:
|
|
contributors: dict mapping github_handle -> set of source labels
|
|
unknown_emails: dict mapping email -> git name (for emails not in AUTHOR_MAP)
|
|
"""
|
|
range_spec = f"{since_tag}..{until}"
|
|
log = git(
|
|
"log", range_spec,
|
|
"--format=%H|%an|%ae|%s",
|
|
"--no-merges",
|
|
)
|
|
|
|
contributors = defaultdict(set)
|
|
unknown_emails = {}
|
|
|
|
if not log:
|
|
return contributors, unknown_emails
|
|
|
|
for line in log.split("\n"):
|
|
if not line.strip():
|
|
continue
|
|
parts = line.split("|", 3)
|
|
if len(parts) != 4:
|
|
continue
|
|
_sha, name, email, _subject = parts
|
|
|
|
handle = resolve_author(name, email)
|
|
# resolve_author returns "@handle" or plain name
|
|
if handle.startswith("@"):
|
|
contributors[handle.lstrip("@")].add("commit")
|
|
else:
|
|
# Could not resolve — record as unknown
|
|
contributors[handle].add("commit")
|
|
unknown_emails[email] = name
|
|
|
|
return contributors, unknown_emails
|
|
|
|
|
|
def collect_co_authors(since_tag, until="HEAD"):
|
|
"""Collect contributors from Co-authored-by trailers in commit messages.
|
|
|
|
Returns:
|
|
contributors: dict mapping github_handle -> set of source labels
|
|
unknown_emails: dict mapping email -> git name
|
|
"""
|
|
range_spec = f"{since_tag}..{until}"
|
|
# Get full commit messages to scan for trailers
|
|
log = git(
|
|
"log", range_spec,
|
|
"--format=__COMMIT__%H%n%b",
|
|
"--no-merges",
|
|
)
|
|
|
|
contributors = defaultdict(set)
|
|
unknown_emails = {}
|
|
|
|
if not log:
|
|
return contributors, unknown_emails
|
|
|
|
for line in log.split("\n"):
|
|
match = CO_AUTHORED_RE.search(line)
|
|
if match:
|
|
name = match.group(1).strip()
|
|
email = match.group(2).strip()
|
|
handle = resolve_author(name, email)
|
|
if handle.startswith("@"):
|
|
contributors[handle.lstrip("@")].add("co-author")
|
|
else:
|
|
contributors[handle].add("co-author")
|
|
unknown_emails[email] = name
|
|
|
|
return contributors, unknown_emails
|
|
|
|
|
|
def collect_salvaged_contributors(since_tag, until="HEAD"):
|
|
"""Scan merged PR bodies for salvage/cherry-pick/co-author attribution.
|
|
|
|
Uses the gh CLI to fetch PRs, then filters to the date range defined
|
|
by since_tag..until and scans bodies for salvage patterns.
|
|
|
|
Returns:
|
|
contributors: dict mapping github_handle -> set of source labels
|
|
pr_refs: dict mapping github_handle -> list of PR numbers where found
|
|
"""
|
|
contributors = defaultdict(set)
|
|
pr_refs = defaultdict(list)
|
|
|
|
# Determine the date range from git tags/refs
|
|
since_date = git("log", "-1", "--format=%aI", since_tag)
|
|
if until == "HEAD":
|
|
until_date = git("log", "-1", "--format=%aI", "HEAD")
|
|
else:
|
|
until_date = git("log", "-1", "--format=%aI", until)
|
|
|
|
if not since_date:
|
|
print(f" [warn] Could not resolve date for {since_tag}", file=sys.stderr)
|
|
return contributors, pr_refs
|
|
|
|
prs = gh_pr_list()
|
|
if not prs:
|
|
return contributors, pr_refs
|
|
|
|
for pr in prs:
|
|
# Filter by merge date if available
|
|
merged_at = pr.get("mergedAt", "")
|
|
if merged_at and since_date:
|
|
if merged_at < since_date:
|
|
continue
|
|
if until_date and merged_at > until_date:
|
|
continue
|
|
|
|
body = pr.get("body") or ""
|
|
pr_number = pr.get("number", "?")
|
|
|
|
# Also credit the PR author
|
|
pr_author = pr.get("author", {})
|
|
pr_author_login = pr_author.get("login", "") if isinstance(pr_author, dict) else ""
|
|
|
|
for pattern in SALVAGE_PATTERNS:
|
|
for match in pattern.finditer(body):
|
|
value = match.group(1)
|
|
# If it's a number, it's a PR reference — skip for now
|
|
# (would need another API call to resolve PR author)
|
|
if value.isdigit():
|
|
continue
|
|
contributors[value].add("salvage")
|
|
pr_refs[value].append(pr_number)
|
|
|
|
return contributors, pr_refs
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Release file comparison
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def check_release_file(release_file, all_contributors):
|
|
"""Check which contributors are mentioned in the release file.
|
|
|
|
Returns:
|
|
mentioned: set of handles found in the file
|
|
missing: set of handles NOT found in the file
|
|
"""
|
|
try:
|
|
content = Path(release_file).read_text()
|
|
except FileNotFoundError:
|
|
print(f" [error] Release file not found: {release_file}", file=sys.stderr)
|
|
return set(), set(all_contributors)
|
|
|
|
mentioned = set()
|
|
missing = set()
|
|
content_lower = content.lower()
|
|
|
|
for handle in all_contributors:
|
|
# Check for @handle or just handle (case-insensitive)
|
|
if f"@{handle.lower()}" in content_lower or handle.lower() in content_lower:
|
|
mentioned.add(handle)
|
|
else:
|
|
missing.add(handle)
|
|
|
|
return mentioned, missing
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Audit contributors across git history, co-author trailers, and salvaged PRs.",
|
|
)
|
|
parser.add_argument(
|
|
"--since-tag",
|
|
required=True,
|
|
help="Git tag to start from (e.g., v2026.4.8)",
|
|
)
|
|
parser.add_argument(
|
|
"--until",
|
|
default="HEAD",
|
|
help="Git ref to end at (default: HEAD)",
|
|
)
|
|
parser.add_argument(
|
|
"--release-file",
|
|
default=None,
|
|
help="Path to a release notes file to check for missing contributors",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
print(f"=== Contributor Audit: {args.since_tag}..{args.until} ===")
|
|
print()
|
|
|
|
# ---- 1. Git commit authors ----
|
|
print("[1/3] Scanning git commit authors...")
|
|
commit_contribs, commit_unknowns = collect_commit_authors(args.since_tag, args.until)
|
|
print(f" Found {len(commit_contribs)} contributor(s) from commits.")
|
|
|
|
# ---- 2. Co-authored-by trailers ----
|
|
print("[2/3] Scanning Co-authored-by trailers...")
|
|
coauthor_contribs, coauthor_unknowns = collect_co_authors(args.since_tag, args.until)
|
|
print(f" Found {len(coauthor_contribs)} contributor(s) from co-author trailers.")
|
|
|
|
# ---- 3. Salvaged PRs ----
|
|
print("[3/3] Scanning salvaged/cherry-picked PR descriptions...")
|
|
salvage_contribs, salvage_pr_refs = collect_salvaged_contributors(args.since_tag, args.until)
|
|
print(f" Found {len(salvage_contribs)} contributor(s) from salvaged PRs.")
|
|
|
|
# ---- Merge all contributors ----
|
|
all_contributors = defaultdict(set)
|
|
for handle, sources in commit_contribs.items():
|
|
all_contributors[handle].update(sources)
|
|
for handle, sources in coauthor_contribs.items():
|
|
all_contributors[handle].update(sources)
|
|
for handle, sources in salvage_contribs.items():
|
|
all_contributors[handle].update(sources)
|
|
|
|
# Merge unknown emails
|
|
all_unknowns = {}
|
|
all_unknowns.update(commit_unknowns)
|
|
all_unknowns.update(coauthor_unknowns)
|
|
|
|
# Filter out AI assistants, bots, and machine accounts
|
|
ignored = {h for h in all_contributors if is_ignored(h)}
|
|
for h in ignored:
|
|
del all_contributors[h]
|
|
# Also filter unknowns by email
|
|
all_unknowns = {e: n for e, n in all_unknowns.items() if not is_ignored(n, e)}
|
|
|
|
# ---- Output ----
|
|
print()
|
|
print(f"=== All Contributors ({len(all_contributors)}) ===")
|
|
print()
|
|
|
|
# Sort by handle, case-insensitive
|
|
for handle in sorted(all_contributors.keys(), key=str.lower):
|
|
sources = sorted(all_contributors[handle])
|
|
source_str = ", ".join(sources)
|
|
extra = ""
|
|
if handle in salvage_pr_refs:
|
|
pr_nums = salvage_pr_refs[handle]
|
|
extra = f" (PRs: {', '.join(f'#{n}' for n in pr_nums)})"
|
|
print(f" @{handle} [{source_str}]{extra}")
|
|
|
|
# ---- Unknown emails ----
|
|
if all_unknowns:
|
|
print()
|
|
print(f"=== Unknown Emails ({len(all_unknowns)}) ===")
|
|
print("These emails are not in AUTHOR_MAP and should be added:")
|
|
print()
|
|
for email, name in sorted(all_unknowns.items()):
|
|
print(f' "{email}": "{name}",')
|
|
|
|
# ---- Release file comparison ----
|
|
if args.release_file:
|
|
print()
|
|
print(f"=== Release File Check: {args.release_file} ===")
|
|
print()
|
|
mentioned, missing = check_release_file(args.release_file, all_contributors.keys())
|
|
print(f" Mentioned in release notes: {len(mentioned)}")
|
|
print(f" Missing from release notes: {len(missing)}")
|
|
if missing:
|
|
print()
|
|
print(" Contributors NOT mentioned in the release file:")
|
|
for handle in sorted(missing, key=str.lower):
|
|
sources = sorted(all_contributors[handle])
|
|
print(f" @{handle} [{', '.join(sources)}]")
|
|
else:
|
|
print()
|
|
print(" All contributors are mentioned in the release file!")
|
|
|
|
print()
|
|
print("Done.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|