Initial commit - Bugzilla 4.4.4

Signed-off-by: Fathi Boudra <fathi.boudra@linaro.org>
author: Fathi Boudra <fathi.boudra@linaro.org> 2014-06-02 17:14:02 +0300
committer: Fathi Boudra <fathi.boudra@linaro.org> 2014-06-02 17:14:02 +0300
commit: 46f29f243b1e7642e860b40214d5f893aad678e1 (patch)
tree: 7ac82cee0957299e51c36f22ce1b172670513348 /collectstats.pl
1 files changed, 536 insertions, 0 deletions
diff --git a/collectstats.pl b/collectstats.pl
new file mode 100755
index 0000000..aa98ddf
--- /dev/null
+++ b/collectstats.pl
@@ -0,0 +1,536 @@
+#!/usr/bin/perl -w
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# This Source Code Form is "Incompatible With Secondary Licenses", as
+# defined by the Mozilla Public License, v. 2.0.
+
+use strict;
+use lib qw(. lib);
+
+use Getopt::Long qw(:config bundling);
+use Pod::Usage;
+use List::Util qw(first);
+use Cwd;
+
+use Bugzilla;
+use Bugzilla::Constants;
+use Bugzilla::Error;
+use Bugzilla::Util;
+use Bugzilla::Search;
+use Bugzilla::User;
+use Bugzilla::Product;
+use Bugzilla::Field;
+use Bugzilla::Install::Filesystem qw(fix_dir_permissions);
+
+my %switch;
+GetOptions(\%switch, 'help|h', 'regenerate');
+
+# Print the help message if that switch was selected.
+pod2usage({-verbose => 1, -exitval => 1}) if $switch{'help'};
+
+# Turn off output buffering (probably needed when displaying output feedback
+# in the regenerate mode).
+$| = 1;
+
+my $datadir = bz_locations()->{'datadir'};
+my $graphsdir = bz_locations()->{'graphsdir'};
+
+# Tidy up after graphing module
+my $cwd = Cwd::getcwd();
+if (chdir($graphsdir)) {
+    unlink <./*.gif>;
+    unlink <./*.png>;
+    # chdir("..") doesn't work if graphs is a symlink, see bug 429378
+    chdir($cwd);
+}
+
+my $dbh = Bugzilla->switch_to_shadow_db();
+
+# As we can now customize statuses and resolutions, looking at the current list
+# of legal values only is not enough as some now removed statuses and resolutions
+# may have existed in the past, or have been renamed. We want them all.
+my $fields = {};
+foreach my $field ('bug_status', 'resolution') {
+    my $values = get_legal_field_values($field);
+    my $old_values = $dbh->selectcol_arrayref(
+                             "SELECT bugs_activity.added
+                                FROM bugs_activity
+                          INNER JOIN fielddefs
+                                  ON fielddefs.id = bugs_activity.fieldid
+                           LEFT JOIN $field
+                                  ON $field.value = bugs_activity.added
+                               WHERE fielddefs.name = ?
+                                 AND $field.id IS NULL
+
+                               UNION
+
+                              SELECT bugs_activity.removed
+                                FROM bugs_activity
+                          INNER JOIN fielddefs
+                                  ON fielddefs.id = bugs_activity.fieldid
+                           LEFT JOIN $field
+                                  ON $field.value = bugs_activity.removed
+                               WHERE fielddefs.name = ?
+                                 AND $field.id IS NULL",
+                               undef, ($field, $field));
+
+    push(@$values, @$old_values);
+    $fields->{$field} = $values;
+}
+
+my @statuses = @{$fields->{'bug_status'}};
+my @resolutions = @{$fields->{'resolution'}};
+# Exclude "" from the resolution list.
+@resolutions = grep {$_} @resolutions;
+
+# --regenerate was taking an enormous amount of time to query everything
+# per bug, per day. Instead, we now just get all the data out of the DB
+# at once and stuff it into some data structures.
+my (%bug_status, %bug_resolution, %removed);
+if ($switch{'regenerate'}) {
+    %bug_resolution = @{ $dbh->selectcol_arrayref(
+        'SELECT bug_id, resolution FROM bugs', {Columns=>[1,2]}) };
+    %bug_status = @{ $dbh->selectcol_arrayref(
+        'SELECT bug_id, bug_status FROM bugs', {Columns=>[1,2]}) };
+
+    my $removed_sth = $dbh->prepare(
+        q{SELECT bugs_activity.bug_id, bugs_activity.removed,}
+        . $dbh->sql_to_days('bugs_activity.bug_when')
+       . q{ FROM bugs_activity
+           WHERE bugs_activity.fieldid = ?
+        ORDER BY bugs_activity.bug_when});
+
+    %removed = (bug_status => {}, resolution => {});
+    foreach my $field (qw(bug_status resolution)) {
+        my $field_id = Bugzilla::Field->check($field)->id;
+        my $rows = $dbh->selectall_arrayref($removed_sth, undef, $field_id);
+        my $hash = $removed{$field};
+        foreach my $row (@$rows) {
+            my ($bug_id, $removed, $when) = @$row;
+            $hash->{$bug_id} ||= [];
+            push(@{ $hash->{$bug_id} }, { when    => int($when),
+                                          removed => $removed });
+        }
+    }
+}
+
+my $tstart = time;
+
+my @myproducts = Bugzilla::Product->get_all;
+unshift(@myproducts, "-All-");
+
+my $dir = "$datadir/mining";
+if (!-d $dir) {
+    mkdir $dir or die "mkdir $dir failed: $!";
+    fix_dir_permissions($dir);
+}
+
+foreach (@myproducts) {
+    if ($switch{'regenerate'}) {
+        regenerate_stats($dir, $_, \%bug_resolution, \%bug_status, \%removed);
+    } else {
+        &collect_stats($dir, $_);
+    }
+}
+# Fix permissions for all files in mining/.
+fix_dir_permissions($dir);
+
+my $tend = time;
+# Uncomment the following line for performance testing.
+#say "Total time taken " . delta_time($tstart, $tend);
+
+CollectSeriesData();
+
+sub collect_stats {
+    my $dir = shift;
+    my $product = shift;
+    my $when = localtime (time);
+    my $dbh = Bugzilla->dbh;
+    my $product_id;
+
+    if (ref $product) {
+        $product_id = $product->id;
+        $product = $product->name;
+    }
+
+    # NB: Need to mangle the product for the filename, but use the real
+    # product name in the query
+    my $file_product = $product;
+    $file_product =~ s/\//-/gs;
+    my $file = join '/', $dir, $file_product;
+    my $exists = -f $file;
+
+    # if the file exists, get the old status and resolution list for that product.
+    my @data;
+    @data = get_old_data($file) if $exists;
+
+    # If @data is not empty, then we have to recreate the data file.
+    if (scalar(@data)) {
+        open(DATA, '>', $file)
+          || ThrowCodeError('chart_file_open_fail', {'filename' => $file});
+    }
+    else {
+        open(DATA, '>>', $file)
+          || ThrowCodeError('chart_file_open_fail', {'filename' => $file});
+    }
+
+    if (Bugzilla->params->{'utf8'}) {
+        binmode DATA, ':utf8';
+    }
+
+    # Now collect current data.
+    my @row = (today());
+    my $status_sql = q{SELECT COUNT(*) FROM bugs WHERE bug_status = ?};
+    my $reso_sql   = q{SELECT COUNT(*) FROM bugs WHERE resolution = ?};
+
+    if ($product ne '-All-') {
+        $status_sql .= q{ AND product_id = ?};
+        $reso_sql   .= q{ AND product_id = ?};
+    }
+
+    my $sth_status = $dbh->prepare($status_sql);
+    my $sth_reso   = $dbh->prepare($reso_sql);
+
+    my @values ;
+    foreach my $status (@statuses) {
+        @values = ($status);
+        push (@values, $product_id) if ($product ne '-All-');
+        my $count = $dbh->selectrow_array($sth_status, undef, @values);
+        push(@row, $count);
+    }
+    foreach my $resolution (@resolutions) {
+        @values = ($resolution);
+        push (@values, $product_id) if ($product ne '-All-');
+        my $count = $dbh->selectrow_array($sth_reso, undef, @values);
+        push(@row, $count);
+    }
+
+    if (!$exists || scalar(@data)) {
+        my $fields = join('|', ('DATE', @statuses, @resolutions));
+        print DATA <<FIN;
+# Bugzilla Daily Bug Stats
+#
+# Do not edit me! This file is generated.
+#
+# fields: $fields
+# Product: $product
+# Created: $when
+FIN
+    }
+
+    # Add existing data, if needed. Note that no count is not treated
+    # the same way as a count with 0 bug.
+    foreach my $data (@data) {
+        print DATA join('|', map {defined $data->{$_} ? $data->{$_} : ''}
+                                 ('DATE', @statuses, @resolutions)) . "\n";
+    }
+    print DATA (join '|', @row) . "\n";
+    close DATA;
+}
+
+sub get_old_data {
+    my $file = shift;
+
+    open(DATA, '<', $file)
+      || ThrowCodeError('chart_file_open_fail', {'filename' => $file});
+
+    if (Bugzilla->params->{'utf8'}) {
+        binmode DATA, ':utf8';
+    }
+
+    my @data;
+    my @columns;
+    my $recreate = 0;
+    while (<DATA>) {
+        chomp;
+        next unless $_;
+        if (/^# fields?:\s*(.+)\s*$/) {
+            @columns = split(/\|/, $1);
+            # Compare this list with @statuses and @resolutions.
+            # If they are identical, then we can safely append new data
+            # to the end of the file; else we have to recreate it.
+            $recreate = 1;
+            my @new_cols = ($columns[0], @statuses, @resolutions);
+            if (scalar(@columns) == scalar(@new_cols)) {
+                my $identical = 1;
+                for (0 .. $#columns) {
+                    $identical = 0 if ($columns[$_] ne $new_cols[$_]);
+                }
+                last if $identical;
+            }
+        }
+        next unless $recreate;
+        next if (/^#/); # Ignore comments.
+        # If we have to recreate the file, we have to load all existing
+        # data first.
+        my @line = split /\|/;
+        my %data;
+        foreach my $column (@columns) {
+            $data{$column} = shift @line;
+        }
+        push(@data, \%data);
+    }
+    close(DATA);
+    return @data;
+}
+
+# This regenerates all statistics from the database.
+sub regenerate_stats {
+    my ($dir, $product, $bug_resolution, $bug_status, $removed) = @_;
+
+    my $dbh = Bugzilla->dbh;
+    my $when = localtime(time());
+    my $tstart = time();
+
+    # NB: Need to mangle the product for the filename, but use the real
+    # product name in the query
+    if (ref $product) {
+        $product = $product->name;
+    }
+    my $file_product = $product;
+    $file_product =~ s/\//-/gs;
+    my $file = join '/', $dir, $file_product;
+
+    my $and_product = "";
+    my $from_product = "";
+
+    my @values = ();
+    if ($product ne '-All-') {
+        $and_product = q{ AND products.name = ?};
+        $from_product = q{ INNER JOIN products 
+                          ON bugs.product_id = products.id};
+        push (@values, $product);
+    }
+
+    # Determine the start date from the date the first bug in the
+    # database was created, and the end date from the current day.
+    # If there were no bugs in the search, return early.
+    my $query = q{SELECT } .
+                $dbh->sql_to_days('creation_ts') . q{ AS start_day, } .
+                $dbh->sql_to_days('current_date') . q{ AS end_day, } . 
+                $dbh->sql_to_days("'1970-01-01'") . 
+                 qq{ FROM bugs $from_product 
+                   WHERE } . $dbh->sql_to_days('creation_ts') . 
+                         qq{ IS NOT NULL $and_product 
+                ORDER BY start_day } . $dbh->sql_limit(1);
+    my ($start, $end, $base) = $dbh->selectrow_array($query, undef, @values);
+
+    if (!defined $start) {
+        return;
+    }
+
+    if (open DATA, ">$file") {
+        my $fields = join('|', ('DATE', @statuses, @resolutions));
+        print DATA <<FIN;
+# Bugzilla Daily Bug Stats
+#
+# Do not edit me! This file is generated.
+#
+# fields: $fields
+# Product: $product
+# Created: $when
+FIN
+        # For each day, generate a line of statistics.
+        my $total_days = $end - $start;
+        my @bugs;
+        for (my $day = $start + 1; $day <= $end; $day++) {
+            # Some output feedback
+            my $percent_done = ($day - $start - 1) * 100 / $total_days;
+            printf "\rRegenerating $product \[\%.1f\%\%]", $percent_done;
+
+            # Get a list of bugs that were created the previous day, and
+            # add those bugs to the list of bugs for this product.
+            $query = qq{SELECT bug_id 
+                          FROM bugs $from_product 
+                         WHERE bugs.creation_ts < } . 
+                         $dbh->sql_from_days($day - 1) . 
+                         q{ AND bugs.creation_ts >= } . 
+                         $dbh->sql_from_days($day - 2) . 
+                        $and_product . q{ ORDER BY bug_id};
+
+            my $bug_ids = $dbh->selectcol_arrayref($query, undef, @values);
+            push(@bugs, @$bug_ids);
+
+            my %bugcount;
+            foreach (@statuses) { $bugcount{$_} = 0; }
+            foreach (@resolutions) { $bugcount{$_} = 0; }
+            # Get information on bug states and resolutions.
+            for my $bug (@bugs) {
+                my $status = _get_value(
+                    $removed->{'bug_status'}->{$bug},
+                    $bug_status,  $day, $bug);
+
+                if (defined $bugcount{$status}) {
+                    $bugcount{$status}++;
+                }
+
+                my $resolution = _get_value(
+                    $removed->{'resolution'}->{$bug},
+                    $bug_resolution, $day, $bug);
+
+                if (defined $bugcount{$resolution}) {
+                    $bugcount{$resolution}++;
+                }
+            }
+
+            # Generate a line of output containing the date and counts
+            # of bugs in each state.
+            my $date = sqlday($day, $base);
+            print DATA "$date";
+            foreach (@statuses) { print DATA "|$bugcount{$_}"; }
+            foreach (@resolutions) { print DATA "|$bugcount{$_}"; }
+            print DATA "\n";
+        }
+
+        # Finish up output feedback for this product.
+        my $tend = time;
+        say "\rRegenerating $product \[100.0\%] - " . delta_time($tstart, $tend);
+
+        close DATA;
+    }
+}
+
+# A helper for --regenerate.
+# For each bug that exists on a day, we determine its status/resolution
+# at the beginning of the day.  If there were no status/resolution
+# changes on or after that day, the status was the same as it
+# is today (the "current" value).  Otherwise, the status was equal to the
+# first "previous value" entry in the bugs_activity table for that 
+# bug made on or after that day.
+sub _get_value {
+    my ($removed, $current, $day, $bug) = @_;
+
+    # Get the first change that's on or after this day.
+    my $item = first { $_->{when} >= $day } @{ $removed || [] };
+
+    # If there's no change on or after this day, then we just return the
+    # current value.
+    return $item ? $item->{removed} : $current->{$bug};
+}
+
+sub today {
+    my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
+    return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
+}
+
+sub today_dash {
+    my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
+    return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom;
+}
+
+sub sqlday {
+    my ($day, $base) = @_;
+    $day = ($day - $base) * 86400;
+    my ($dom, $mon, $year) = (gmtime($day))[3, 4, 5];
+    return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
+}
+
+sub delta_time {
+    my $tstart = shift;
+    my $tend = shift;
+    my $delta = $tend - $tstart;
+    my $hours = int($delta/3600);
+    my $minutes = int($delta/60) - ($hours * 60);
+    my $seconds = $delta - ($minutes * 60) - ($hours * 3600);
+    return sprintf("%02d:%02d:%02d" , $hours, $minutes, $seconds);
+}
+
+sub CollectSeriesData {
+    # We need some way of randomising the distribution of series, such that
+    # all of the series which are to be run every 7 days don't run on the same
+    # day. This is because this might put the server under severe load if a
+    # particular frequency, such as once a week, is very common. We achieve
+    # this by only running queries when:
+    # (days_since_epoch + series_id) % frequency = 0. So they'll run every
+    # <frequency> days, but the start date depends on the series_id.
+    my $days_since_epoch = int(time() / (60 * 60 * 24));
+    my $today = today_dash();
+
+    # We save a copy of the main $dbh and then switch to the shadow and get
+    # that one too. Remember, these may be the same.
+    my $dbh = Bugzilla->switch_to_main_db();
+    my $shadow_dbh = Bugzilla->switch_to_shadow_db();
+    
+    my $serieses = $dbh->selectall_hashref("SELECT series_id, query, creator " .
+                      "FROM series " .
+                      "WHERE frequency != 0 AND " . 
+                      "MOD(($days_since_epoch + series_id), frequency) = 0",
+                      "series_id");
+
+    # We prepare the insertion into the data table, for efficiency.
+    my $sth = $dbh->prepare("INSERT INTO series_data " .
+                            "(series_id, series_date, series_value) " .
+                            "VALUES (?, " . $dbh->quote($today) . ", ?)");
+
+    # We delete from the table beforehand, to avoid SQL errors if people run
+    # collectstats.pl twice on the same day.
+    my $deletesth = $dbh->prepare("DELETE FROM series_data 
+                                   WHERE series_id = ? AND series_date = " .
+                                   $dbh->quote($today));
+                                     
+    foreach my $series_id (keys %$serieses) {
+        # We set up the user for Search.pm's permission checking - each series
+        # runs with the permissions of its creator.
+        my $user = new Bugzilla::User($serieses->{$series_id}->{'creator'});
+        my $cgi = new Bugzilla::CGI($serieses->{$series_id}->{'query'});
+        my $data;
+
+        # Do not die if Search->new() detects invalid data, such as an obsolete
+        # login name or a renamed product or component, etc.
+        eval {
+            my $search = new Bugzilla::Search('params' => scalar $cgi->Vars,
+                                              'fields' => ["bug_id"],
+                                              'allow_unlimited' => 1,
+                                              'user'   => $user);
+            $data = $search->data;
+        };
+
+        if (!$@) {
+            # We need to count the returned rows. Without subselects, we can't
+            # do this directly in the SQL for all queries. So we do it by hand.
+            my $count = scalar(@$data) || 0;
+
+            $deletesth->execute($series_id);
+            $sth->execute($series_id, $count);
+        }
+    }
+}
+
+__END__
+
+=head1 NAME
+
+collectstats.pl - Collect data about Bugzilla bugs.
+
+=head1 SYNOPSIS
+
+ ./collectstats.pl [--regenerate] [--help]
+
+Collects data about bugs to be used in Old and New Charts.
+
+=head1 OPTIONS
+
+=over
+
+=item B<--help>
+
+Print this help page.
+
+=item B<--regenerate>
+
+Recreate all the data about bugs, from day 1. This option is only relevant
+for Old Charts, and has no effect for New Charts.
+This option will overwrite all existing collected data and can take a huge
+amount of time. You normally don't need to use this option (do not use it
+in a cron job).
+
+=back
+
+=head1 DESCRIPTION
+
+This script collects data about all bugs for Old Charts, triaged by product
+and by bug status and resolution. It also collects data for New Charts, based
+on existing series. For New Charts, data is only collected once a series is
+defined; this script cannot recreate data prior to this date.
author	Fathi Boudra <fathi.boudra@linaro.org>	2014-06-02 17:14:02 +0300
committer	Fathi Boudra <fathi.boudra@linaro.org>	2014-06-02 17:14:02 +0300
commit	46f29f243b1e7642e860b40214d5f893aad678e1 (patch)
tree	7ac82cee0957299e51c36f22ce1b172670513348 /collectstats.pl