aboutsummaryrefslogtreecommitdiff
path: root/packages/git-blame-stats/git-blame-stats.py
diff options
context:
space:
mode:
authorFranck Cuny <franck@fcuny.net>2024-03-06 06:29:24 -0800
committerFranck Cuny <franck@fcuny.net>2024-03-06 06:29:24 -0800
commit1e4a5aa09c1c8f43722c9c260f011398799a8e8f (patch)
treecd73e0fb8ba53bd21cee6ccf2dcc85639bbbb93f /packages/git-blame-stats/git-blame-stats.py
parentset correct git email in the profiles (diff)
downloadinfra-1e4a5aa09c1c8f43722c9c260f011398799a8e8f.tar.gz
rename `tools` to `packages` to follow convention
The convention is to use `pkgs` or `packages` for overlays and definition of custom packages. Since I'm already using `pkg` for go, I prefer to use `packages` for my scripts.
Diffstat (limited to 'packages/git-blame-stats/git-blame-stats.py')
-rwxr-xr-xpackages/git-blame-stats/git-blame-stats.py95
1 files changed, 95 insertions, 0 deletions
diff --git a/packages/git-blame-stats/git-blame-stats.py b/packages/git-blame-stats/git-blame-stats.py
new file mode 100755
index 0000000..3cc4f4a
--- /dev/null
+++ b/packages/git-blame-stats/git-blame-stats.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+
+import argparse
+import subprocess
+from typing import Any
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+ "rev", metavar="revision", type=str, help="the revision", default="HEAD", nargs="?"
+)
+args = parser.parse_args()
+
+authors: dict[str, Any] = dict()
+max_lenght_author = 0
+max_lenght_email = 0
+
+
+def get_files(rev):
+ """Returns a list of files for the repository, at the given path, for the given revision."""
+ tree = subprocess.run(
+ ["git", "ls-tree", "--name-only", "-r", rev],
+ capture_output=True,
+ check=True,
+ encoding="utf-8",
+ )
+ return tree.stdout.splitlines()
+
+
+def line_info(filename, rev):
+ """Generates a set of commit blocks using `git blame` for a file.
+
+ Each block corresponds to the information about a single line of code."""
+ blame = subprocess.run(
+ ["git", "blame", "-w", "--line-porcelain", rev, "--", filename],
+ capture_output=True,
+ encoding="utf-8",
+ check=True,
+ )
+ block = []
+ for line in blame.stdout.splitlines():
+ block.append(line)
+ if line.startswith("\t"):
+ yield block
+ block = []
+
+
+files = get_files(args.rev)
+
+for filename in files:
+ try:
+ for block in line_info(filename.rstrip(), args.rev):
+ author = ""
+ author_email = ""
+ commit = ""
+ skip = False
+ for i, val in enumerate(block):
+ if i == 0:
+ commit = val.split()[0]
+ continue
+ if val.startswith("author "):
+ author = " ".join(val.split()[1:])
+ continue
+ if val.startswith("author-mail"):
+ author_email = " ".join(val.split()[1:])
+ continue
+ if val.startswith("\t") and val == "\t":
+ skip = True
+ if skip:
+ continue
+ if authors.get(author, None) is None:
+ authors[author] = {
+ "email": author_email,
+ "commits": set(),
+ "files": set(),
+ "lines": 0,
+ }
+ authors[author]["commits"].add(commit)
+ authors[author]["files"].add(filename)
+ authors[author]["lines"] += 1
+ if len(author) > max_lenght_author:
+ max_lenght_author = len(author)
+ if len(author_email) > max_lenght_email:
+ max_lenght_email = len(author_email)
+ except Exception:
+ continue
+
+for author, stats in authors.items():
+ email = stats["email"]
+ lines = stats["lines"]
+ commits = len(stats["commits"])
+ files = len(stats["files"])
+ print(
+ f"{author:{max_lenght_author}} {email:{max_lenght_email}} {lines:6} {commits:6} {files:6}"
+ )