office-gobmx/bin/lo-commit-stat
Thorsten Behrens 418df9282d Slightly more robust removal of bug title prefix.
Change-Id: Ic37589222831d03ec48689a077b1eb16a9199385
2012-06-25 13:59:58 +02:00

494 lines
15 KiB
Perl
Executable file

#!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if $running_under_some_shell;
#!/usr/bin/perl
use strict;
use LWP::UserAgent;
use utf8;
my $main_repo="core";
my @pieces=("binfilter", "dictionaries", "help", "translations");
my %bugzillas = (
fdo => "https://bugs.freedesktop.org/",
bnc => "https://bugzilla.novell.com/",
rhbz => "https://bugzilla.redhat.com/",
i => "https://issues.apache.org/ooo/",
);
sub search_bugs($$$$)
{
my ($pdata, $piece, $commit_id, $line) = @_;
my $bug = "";
my $bug_orig;
while (defined $bug) {
# match fdo#123, rhz#123, i#123, #123
# but match only bug number with >= 4 digits
if ( $line =~ m/(\w+\#+\d{4,})/ ) {
$bug_orig = $1;
$bug = $1;
# default to issuezilla for the #123 variant
# but match only bug number with >= 4 digits
} elsif ( $line =~ m/(\#)(\d{4,})/ ) {
$bug_orig = $1 . $2;
$bug = "i#$2";
# match #i123#
} elsif ( $line =~ m/(\#i)(\d+)(\#)/ ) {
$bug_orig = $1 . $2 . $3;
$bug = "i#$2";
} else {
$bug = undef;
next;
}
# print " found $bug\n";
# remove bug number from the comment; it will be added later a standardized way
$bug_orig =~ s/\#/\\#/;
$line =~ s/[Rr]esolve[ds]:?\s*$bug_orig\s*//;
$line =~ s/\s*-\s*$bug_orig\s*//;
$line =~ s/\(?$bug_orig\)?\s*[:,-]?\s*//;
# bnc# is preferred over n# for novell bugs
$bug =~ s/^n\#/bnc#/;
# deb# is preferred over debian# for debian bugs
$bug =~ s/^debian\#/deb#/;
# easyhack# is sometimes used for fdo# - based easy hacks
$bug =~ s/^easyhack\#/fdo#/;
# someone mistyped fdo as fd0
$bug =~ s/^fd0\#/fdo#/;
# save the bug number
%{$pdata->{$piece}{$commit_id}{'bugs'}} = () if (! defined %{$pdata->{$piece}{$commit_id}{'bugs'}});
$pdata->{$piece}{$commit_id}{'bugs'}{$bug} = 1;
}
return $line;
}
sub standardize_summary($)
{
my $line = shift;
$line =~ s/^\s*//;
$line =~ s/\s*$//;
# lower first letter if the word contains only lowercase letter
if ( $line =~ m/(^.[a-z]+\b)/ ) {
$line =~ m/(^.)/;
my $first_char = lc($1);
$line =~ s/^./$first_char/;
}
# FIXME: remove do at the end of line
# remove bug numbers
return $line;
}
sub load_git_log($$$$$)
{
my ($pdata, $repo_dir, $piece, $branch_name, $git_command) = @_;
my $cmd = "cd $repo_dir; $git_command";
my $commit_id;
my $summary;
print STDERR "Analyzing log from the git repo: $piece...\n";
my $repo_branch_name = get_branch_name($repo_dir);
if ( $branch_name ne $repo_branch_name ) {
die "Error: mismatch of branches:\n" .
" main repo is on the branch: $branch_name\n" .
" $piece repo is on the branch: $repo_branch_name\n";
}
open (GIT, "$cmd 2>&1|") || die "Can't run $cmd: $!";
%{$pdata->{$piece}} = ();
while (my $line = <GIT>) {
chomp $line;
if ( $line =~ m/^commit ([0-9a-z]{20})/ ) {
$commit_id = "$1";
$summary=undef;
%{$pdata->{$piece}{"$commit_id"}} = ();
next;
}
if ( $line =~ /^Author:\s*([^\<]*)\<([^\>]*)>/ ) {
# get rid of extra empty spaces;
my $name = "$1";
$name =~ s/\s+$//;
die "Error: Author already defined for the commit {$commit_id}\n" if defined ($pdata->{$piece}{$commit_id}{'author'});
%{$pdata->{$piece}{$commit_id}{'author'}} = ();
$pdata->{$piece}{$commit_id}{'author'}{'name'} = "$name";
$pdata->{$piece}{$commit_id}{'author'}{'email'} = "$2";
next;
}
if ( $line =~ /^Date:\s+/ ) {
# ignore date line
next;
}
if ( $line =~ /^\s*$/ ) {
# ignore empty line
next;
}
$line = search_bugs($pdata, $piece, $commit_id, $line);
# FIXME: need to be implemented
# search_keywords($pdata, $line);
unless (defined $pdata->{$piece}{$commit_id}{'summary'}) {
$summary = standardize_summary($line);
$pdata->{$piece}{$commit_id}{'summary'} = $summary;
}
}
close GIT;
}
sub get_repo_name($)
{
my $repo_dir = shift;
open (GIT_CONFIG, "$repo_dir/.git/config") ||
die "can't open \"$$repo_dir/.git/config\" for reading: $!\n";
while (my $line = <GIT_CONFIG>) {
chomp $line;
if ( $line =~ /^\s*url\s*=\s*(\S+)$/ ) {
my $repo_name = "$1";
$repo_name = s/.*\///g;
return "$repo_name";
}
}
die "Error: can't find repo name in \"$$repo_dir/.git/config\"\n";
}
sub load_data($$$$$)
{
my ($pdata, $top_dir, $piece, $branch_name, $git_command) = @_;
if (defined $piece) {
my $piece_dir;
if ("$piece" eq "$main_repo") {
$piece_dir = "$top_dir";
} else {
$piece_dir = "$top_dir/clone/$piece";
}
load_git_log($pdata, $piece_dir, $piece, $branch_name, $git_command);
} else {
load_git_log($pdata, $top_dir, $main_repo, $branch_name, $git_command);
foreach my $piece (@pieces) {
load_git_log($pdata, "$top_dir/clone/$piece", $piece, $branch_name, $git_command);
}
}
}
sub get_branch_name($)
{
my ($top_dir) = @_;
my $branch;
my $cmd = "cd $top_dir && git branch";
open (GIT, "$cmd 2>&1|") || die "Can't run $cmd: $!";
while (my $line = <GIT>) {
chomp $line;
if ( $line =~ m/^\*\s*(\S+)/ ) {
$branch = "$1";
}
}
close GIT;
die "Error: did not detect git branch name in $top_dir\n" unless defined ($branch);
return $branch;
}
sub open_log_file($$$$$)
{
my ($log_dir, $log_prefix, $log_suffix, $top_dir, $branch_name) = @_;
my $logfilename = "$log_prefix-$branch_name-$log_suffix.log";
$logfilename = "$log_dir/$logfilename" if (defined $log_dir);
if (-f $logfilename) {
print "WARNING: The log file already exists: $logfilename\n";
print "Do you want to overwrite it? (Y/n)?\n";
my $answer = <STDIN>;
chomp $answer;
$answer = "y" unless ($answer);
die "Please, rename the file or choose another log suffix\n" if ( lc($answer) ne "y" );
}
my $log;
open($log, '>', $logfilename) || die "Can't open \"$logfilename\" for writing: $!\n";
return $log;
}
sub print_summary_in_stat($$$$$$)
{
my ($summary, $ppiece_title, $pbugs, $pauthors, $prefix, $log) = @_;
return if ( $summary eq "" );
# print piece title if not done yet
if ( defined ${$ppiece_title} ) {
print $log "${$ppiece_title}\n";
${$ppiece_title} = undef;
}
# finally print the summary line
my $bugs = "";
if ( %{$pbugs} ) {
$bugs = " (" . join (", ", keys %{$pbugs}) . ")";
}
my $authors = "";
if ( %{$pauthors} ) {
$authors = " [" . join (", ", keys %{$pauthors}) . "]";
}
print $log $prefix, $summary, $bugs, $authors, "\n";
}
sub print_stat($$)
{
my ($pdata, $log) = @_;
foreach my $piece ( sort { $a cmp $b } keys %{$pdata}) {
# check if this piece has any entries at all
my $piece_title = "+ $piece";
if ( %{$pdata->{$piece}} ) {
my $old_summary="";
my %authors = ();
my %bugs = ();
foreach my $id ( sort { lc $pdata->{$piece}{$a}{'summary'} cmp lc $pdata->{$piece}{$b}{'summary'} } keys %{$pdata->{$piece}}) {
my $summary = $pdata->{$piece}{$id}{'summary'};
if ($summary ne $old_summary) {
print_summary_in_stat($old_summary, \$piece_title, \%bugs, \%authors, " + ", $log);
$old_summary = $summary;
%authors = ();
%bugs = ();
}
# collect bug numbers
if (defined $pdata->{$piece}{$id}{'bugs'}) {
foreach my $bug (keys %{$pdata->{$piece}{$id}{'bugs'}}) {
$bugs{$bug} = 1;
}
}
# collect author names
my $author = $pdata->{$piece}{$id}{'author'}{'name'};
$authors{$author} = 1;
}
print_summary_in_stat($old_summary, \$piece_title, \%bugs, \%authors, " + ", $log);
}
}
}
sub get_bug_name($$)
{
my ($bug, $summary) = @_;
print "$bug: ";
$bug =~ m/(?:(\w*)\#+(\d+))/; # fdo#12345
my $bugzilla = $1; # fdo
my $bug_number = $2; # 12345
if ( $bugzillas{$bugzilla} ) {
my $url = $bugzillas{$bugzilla} . "show_bug.cgi?id=" . $bug_number;
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
my $response = $ua->get($url);
if ($response->is_success) {
my $title = $response->title;
if ( $title =~ s/^Bug \d+ \S+ // ) {
print "$title\n";
return $title;
} else {
print "warning: not found; using commit message (only got $title)\n";
}
} else {
print "\n";
}
} else {
print "\n";
}
return $summary;
}
sub print_bugs($$$)
{
my ($pdata, $log, $convert_func) = @_;
# associate bugs with their summaries and fixers
my %bugs = ();
foreach my $piece ( keys %{$pdata}) {
foreach my $id ( keys %{$pdata->{$piece}}) {
foreach my $bug (keys %{$pdata->{$piece}{$id}{'bugs'}}) {
my $author = $pdata->{$piece}{$id}{'author'}{'name'};
my $summary = $pdata->{$piece}{$id}{'summary'};
$bugs{$bug}{'summary'} = $summary;
$bugs{$bug}{'author'}{$author} = 1;
}
}
}
# try to replace summaries with bug names from bugzilla
print "Getting bug titles:\n";
foreach my $bug ( sort { $a cmp $b } keys %bugs) {
$bugs{$bug}{'summary'} = get_bug_name($bug, $bugs{$bug}{'summary'});
}
# print
foreach my $bug ( sort { $a cmp $b } keys %bugs) {
my $summary = $bugs{$bug}{'summary'};
my $authors = "";
if ( %{$bugs{$bug}{'author'}} ) {
$authors = " [" . join (", ", keys %{$bugs{$bug}{'author'}}) . "]";
}
print $log $convert_func->($bug), " ", $summary, $authors, "\n";
}
}
sub print_bugnumbers($$)
{
my ($pdata, $log) = @_;
# just collect bugs
my %bugs = ();
foreach my $piece ( keys %{$pdata}) {
foreach my $id ( keys %{$pdata->{$piece}}) {
foreach my $bug (keys %{$pdata->{$piece}{$id}{'bugs'}}) {
$bugs{$bug} = 1;
}
}
}
print $log join ("\n", sort { $a cmp $b } keys %bugs), "\n";
}
########################################################################
# help
sub usage()
{
print "This script generates LO git commit summary\n\n" .
"Usage: lo-commit-stat [--help] [--no-pieces] [--piece=<piece>] --log-dir=<dir> --log-suffix=<string> topdir [git_arg...]\n\n" .
"Options:\n" .
" --help print this help\n" .
" --no-pieces read changes just from the main repository, ignore other cloned repos\n" .
" --piece=<piece> summarize just changes from the given piece\n" .
" --log-dir=<dir> directory where to put the generated log\n" .
" --log-suffix=<string> suffix of the log file name; the result will be\n" .
" commit-log-<branch>-<log-name-suffix>.log; the branch name\n" .
" is detected automatically\n" .
" --bugs print just bug fixes\n" .
" --wikibugs print just bug fixes, use wiki markup\n" .
" --bug-numbers print just fixed bug numbers\n" .
" --rev-list use \"git rev-list\" instead of \"git log\"; useful to check\n" .
" differences between branches\n" .
" topdir directory with the libreoffice/core clone; the piece repos\n" .
" must be cloned in the main-repo-root/clone/<piece> subdirectories\n" .
" git_arg extra parameters passed to the git command to define\n" .
" the area of interest; The default command is \"git log\" and\n" .
" parameters might be, for example, --after=\"2010-09-27\" or\n" .
" TAG..HEAD; with the option --rev-list, useful might be, for\n" .
" example origin/master ^origin/libreoffice-3-3\n";
}
#######################################################################
#######################################################################
# MAIN
#######################################################################
#######################################################################
my $piece;
my $top_dir;
my $log_prefix = "commit-log";
my $log_dir;
my $log_suffix;
my $log;
my $branch_name;
my $git_command = "git log";
my $branch_name;
my @git_args;
my %data;
my $print_mode = "normal";
foreach my $arg (@ARGV) {
if ($arg eq '--help') {
usage();
exit;
} elsif ($arg eq '--no-pieces') {
$piece = "core";
} elsif ($arg =~ m/--piece=(.*)/) {
$piece = $1;
} elsif ($arg =~ m/--log-suffix=(.*)/) {
$log_suffix = "$1";
} elsif ($arg =~ m/--log-dir=(.*)/) {
$log_dir = "$1";
} elsif ($arg eq '--bugs') {
$log_prefix = "bugfixes";
$print_mode = "bugs";
} elsif ($arg eq '--wikibugs') {
$log_prefix = "bugfixes";
$print_mode = "wikibugs";
} elsif ($arg eq '--bug-numbers') {
$log_prefix = "bugnumbers";
$print_mode = "bugnumbers";
} elsif ($arg eq '--rev-list') {
$git_command = "git rev-list --pretty=medium"
} else {
if (! defined $top_dir) {
$top_dir=$arg;
} else {
push @git_args, $arg;
}
}
}
$git_command .= " " . join ' ', @git_args if (@git_args);
(defined $top_dir) || die "Error: top directory is not defined\n";
(-d "$top_dir") || die "Error: not a directory: $top_dir\n";
(-f "$top_dir/.git/config") || die "Error: can't find $top_dir/.git/config\n";
(!defined $log_dir) || (-d $log_dir) || die "Error: directory does no exist: $log_dir\n";
(defined $log_suffix) || die "Error: define log suffix using --log-suffix=<string>\n";
$branch_name = get_branch_name($top_dir);
load_data(\%data, $top_dir, $piece, $branch_name, $git_command);
$log = open_log_file($log_dir, $log_prefix, $log_suffix, $top_dir, $branch_name);
if ( $print_mode eq "bugs" ) {
# identity-transform bug ids
print_bugs(\%data, $log, sub { return $_[0] } );
} elsif ( $print_mode eq "wikibugs" ) {
# wiki-ize bug ids
print_bugs(\%data, $log, sub { $_[0] =~ s/(.*)\#(.*)/* {{$1|$2}}/; return $_[0] });
} elsif ( $print_mode eq "bugnumbers" ) {
print_bugnumbers(\%data, $log);
} else {
print_stat(\%data, $log);
}
close $log;