diff options
author | Fathi Boudra <fathi.boudra@linaro.org> | 2014-06-02 17:14:02 +0300 |
---|---|---|
committer | Fathi Boudra <fathi.boudra@linaro.org> | 2014-06-02 17:14:02 +0300 |
commit | 46f29f243b1e7642e860b40214d5f893aad678e1 (patch) | |
tree | 7ac82cee0957299e51c36f22ce1b172670513348 /collectstats.pl |
Initial commit - Bugzilla 4.4.4
Signed-off-by: Fathi Boudra <fathi.boudra@linaro.org>
Diffstat (limited to 'collectstats.pl')
-rwxr-xr-x | collectstats.pl | 536 |
1 files changed, 536 insertions, 0 deletions
diff --git a/collectstats.pl b/collectstats.pl new file mode 100755 index 0000000..aa98ddf --- /dev/null +++ b/collectstats.pl @@ -0,0 +1,536 @@ +#!/usr/bin/perl -w +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This Source Code Form is "Incompatible With Secondary Licenses", as +# defined by the Mozilla Public License, v. 2.0. + +use strict; +use lib qw(. lib); + +use Getopt::Long qw(:config bundling); +use Pod::Usage; +use List::Util qw(first); +use Cwd; + +use Bugzilla; +use Bugzilla::Constants; +use Bugzilla::Error; +use Bugzilla::Util; +use Bugzilla::Search; +use Bugzilla::User; +use Bugzilla::Product; +use Bugzilla::Field; +use Bugzilla::Install::Filesystem qw(fix_dir_permissions); + +my %switch; +GetOptions(\%switch, 'help|h', 'regenerate'); + +# Print the help message if that switch was selected. +pod2usage({-verbose => 1, -exitval => 1}) if $switch{'help'}; + +# Turn off output buffering (probably needed when displaying output feedback +# in the regenerate mode). +$| = 1; + +my $datadir = bz_locations()->{'datadir'}; +my $graphsdir = bz_locations()->{'graphsdir'}; + +# Tidy up after graphing module +my $cwd = Cwd::getcwd(); +if (chdir($graphsdir)) { + unlink <./*.gif>; + unlink <./*.png>; + # chdir("..") doesn't work if graphs is a symlink, see bug 429378 + chdir($cwd); +} + +my $dbh = Bugzilla->switch_to_shadow_db(); + +# As we can now customize statuses and resolutions, looking at the current list +# of legal values only is not enough as some now removed statuses and resolutions +# may have existed in the past, or have been renamed. We want them all. +my $fields = {}; +foreach my $field ('bug_status', 'resolution') { + my $values = get_legal_field_values($field); + my $old_values = $dbh->selectcol_arrayref( + "SELECT bugs_activity.added + FROM bugs_activity + INNER JOIN fielddefs + ON fielddefs.id = bugs_activity.fieldid + LEFT JOIN $field + ON $field.value = bugs_activity.added + WHERE fielddefs.name = ? + AND $field.id IS NULL + + UNION + + SELECT bugs_activity.removed + FROM bugs_activity + INNER JOIN fielddefs + ON fielddefs.id = bugs_activity.fieldid + LEFT JOIN $field + ON $field.value = bugs_activity.removed + WHERE fielddefs.name = ? + AND $field.id IS NULL", + undef, ($field, $field)); + + push(@$values, @$old_values); + $fields->{$field} = $values; +} + +my @statuses = @{$fields->{'bug_status'}}; +my @resolutions = @{$fields->{'resolution'}}; +# Exclude "" from the resolution list. +@resolutions = grep {$_} @resolutions; + +# --regenerate was taking an enormous amount of time to query everything +# per bug, per day. Instead, we now just get all the data out of the DB +# at once and stuff it into some data structures. +my (%bug_status, %bug_resolution, %removed); +if ($switch{'regenerate'}) { + %bug_resolution = @{ $dbh->selectcol_arrayref( + 'SELECT bug_id, resolution FROM bugs', {Columns=>[1,2]}) }; + %bug_status = @{ $dbh->selectcol_arrayref( + 'SELECT bug_id, bug_status FROM bugs', {Columns=>[1,2]}) }; + + my $removed_sth = $dbh->prepare( + q{SELECT bugs_activity.bug_id, bugs_activity.removed,} + . $dbh->sql_to_days('bugs_activity.bug_when') + . q{ FROM bugs_activity + WHERE bugs_activity.fieldid = ? + ORDER BY bugs_activity.bug_when}); + + %removed = (bug_status => {}, resolution => {}); + foreach my $field (qw(bug_status resolution)) { + my $field_id = Bugzilla::Field->check($field)->id; + my $rows = $dbh->selectall_arrayref($removed_sth, undef, $field_id); + my $hash = $removed{$field}; + foreach my $row (@$rows) { + my ($bug_id, $removed, $when) = @$row; + $hash->{$bug_id} ||= []; + push(@{ $hash->{$bug_id} }, { when => int($when), + removed => $removed }); + } + } +} + +my $tstart = time; + +my @myproducts = Bugzilla::Product->get_all; +unshift(@myproducts, "-All-"); + +my $dir = "$datadir/mining"; +if (!-d $dir) { + mkdir $dir or die "mkdir $dir failed: $!"; + fix_dir_permissions($dir); +} + +foreach (@myproducts) { + if ($switch{'regenerate'}) { + regenerate_stats($dir, $_, \%bug_resolution, \%bug_status, \%removed); + } else { + &collect_stats($dir, $_); + } +} +# Fix permissions for all files in mining/. +fix_dir_permissions($dir); + +my $tend = time; +# Uncomment the following line for performance testing. +#say "Total time taken " . delta_time($tstart, $tend); + +CollectSeriesData(); + +sub collect_stats { + my $dir = shift; + my $product = shift; + my $when = localtime (time); + my $dbh = Bugzilla->dbh; + my $product_id; + + if (ref $product) { + $product_id = $product->id; + $product = $product->name; + } + + # NB: Need to mangle the product for the filename, but use the real + # product name in the query + my $file_product = $product; + $file_product =~ s/\//-/gs; + my $file = join '/', $dir, $file_product; + my $exists = -f $file; + + # if the file exists, get the old status and resolution list for that product. + my @data; + @data = get_old_data($file) if $exists; + + # If @data is not empty, then we have to recreate the data file. + if (scalar(@data)) { + open(DATA, '>', $file) + || ThrowCodeError('chart_file_open_fail', {'filename' => $file}); + } + else { + open(DATA, '>>', $file) + || ThrowCodeError('chart_file_open_fail', {'filename' => $file}); + } + + if (Bugzilla->params->{'utf8'}) { + binmode DATA, ':utf8'; + } + + # Now collect current data. + my @row = (today()); + my $status_sql = q{SELECT COUNT(*) FROM bugs WHERE bug_status = ?}; + my $reso_sql = q{SELECT COUNT(*) FROM bugs WHERE resolution = ?}; + + if ($product ne '-All-') { + $status_sql .= q{ AND product_id = ?}; + $reso_sql .= q{ AND product_id = ?}; + } + + my $sth_status = $dbh->prepare($status_sql); + my $sth_reso = $dbh->prepare($reso_sql); + + my @values ; + foreach my $status (@statuses) { + @values = ($status); + push (@values, $product_id) if ($product ne '-All-'); + my $count = $dbh->selectrow_array($sth_status, undef, @values); + push(@row, $count); + } + foreach my $resolution (@resolutions) { + @values = ($resolution); + push (@values, $product_id) if ($product ne '-All-'); + my $count = $dbh->selectrow_array($sth_reso, undef, @values); + push(@row, $count); + } + + if (!$exists || scalar(@data)) { + my $fields = join('|', ('DATE', @statuses, @resolutions)); + print DATA <<FIN; +# Bugzilla Daily Bug Stats +# +# Do not edit me! This file is generated. +# +# fields: $fields +# Product: $product +# Created: $when +FIN + } + + # Add existing data, if needed. Note that no count is not treated + # the same way as a count with 0 bug. + foreach my $data (@data) { + print DATA join('|', map {defined $data->{$_} ? $data->{$_} : ''} + ('DATE', @statuses, @resolutions)) . "\n"; + } + print DATA (join '|', @row) . "\n"; + close DATA; +} + +sub get_old_data { + my $file = shift; + + open(DATA, '<', $file) + || ThrowCodeError('chart_file_open_fail', {'filename' => $file}); + + if (Bugzilla->params->{'utf8'}) { + binmode DATA, ':utf8'; + } + + my @data; + my @columns; + my $recreate = 0; + while (<DATA>) { + chomp; + next unless $_; + if (/^# fields?:\s*(.+)\s*$/) { + @columns = split(/\|/, $1); + # Compare this list with @statuses and @resolutions. + # If they are identical, then we can safely append new data + # to the end of the file; else we have to recreate it. + $recreate = 1; + my @new_cols = ($columns[0], @statuses, @resolutions); + if (scalar(@columns) == scalar(@new_cols)) { + my $identical = 1; + for (0 .. $#columns) { + $identical = 0 if ($columns[$_] ne $new_cols[$_]); + } + last if $identical; + } + } + next unless $recreate; + next if (/^#/); # Ignore comments. + # If we have to recreate the file, we have to load all existing + # data first. + my @line = split /\|/; + my %data; + foreach my $column (@columns) { + $data{$column} = shift @line; + } + push(@data, \%data); + } + close(DATA); + return @data; +} + +# This regenerates all statistics from the database. +sub regenerate_stats { + my ($dir, $product, $bug_resolution, $bug_status, $removed) = @_; + + my $dbh = Bugzilla->dbh; + my $when = localtime(time()); + my $tstart = time(); + + # NB: Need to mangle the product for the filename, but use the real + # product name in the query + if (ref $product) { + $product = $product->name; + } + my $file_product = $product; + $file_product =~ s/\//-/gs; + my $file = join '/', $dir, $file_product; + + my $and_product = ""; + my $from_product = ""; + + my @values = (); + if ($product ne '-All-') { + $and_product = q{ AND products.name = ?}; + $from_product = q{ INNER JOIN products + ON bugs.product_id = products.id}; + push (@values, $product); + } + + # Determine the start date from the date the first bug in the + # database was created, and the end date from the current day. + # If there were no bugs in the search, return early. + my $query = q{SELECT } . + $dbh->sql_to_days('creation_ts') . q{ AS start_day, } . + $dbh->sql_to_days('current_date') . q{ AS end_day, } . + $dbh->sql_to_days("'1970-01-01'") . + qq{ FROM bugs $from_product + WHERE } . $dbh->sql_to_days('creation_ts') . + qq{ IS NOT NULL $and_product + ORDER BY start_day } . $dbh->sql_limit(1); + my ($start, $end, $base) = $dbh->selectrow_array($query, undef, @values); + + if (!defined $start) { + return; + } + + if (open DATA, ">$file") { + my $fields = join('|', ('DATE', @statuses, @resolutions)); + print DATA <<FIN; +# Bugzilla Daily Bug Stats +# +# Do not edit me! This file is generated. +# +# fields: $fields +# Product: $product +# Created: $when +FIN + # For each day, generate a line of statistics. + my $total_days = $end - $start; + my @bugs; + for (my $day = $start + 1; $day <= $end; $day++) { + # Some output feedback + my $percent_done = ($day - $start - 1) * 100 / $total_days; + printf "\rRegenerating $product \[\%.1f\%\%]", $percent_done; + + # Get a list of bugs that were created the previous day, and + # add those bugs to the list of bugs for this product. + $query = qq{SELECT bug_id + FROM bugs $from_product + WHERE bugs.creation_ts < } . + $dbh->sql_from_days($day - 1) . + q{ AND bugs.creation_ts >= } . + $dbh->sql_from_days($day - 2) . + $and_product . q{ ORDER BY bug_id}; + + my $bug_ids = $dbh->selectcol_arrayref($query, undef, @values); + push(@bugs, @$bug_ids); + + my %bugcount; + foreach (@statuses) { $bugcount{$_} = 0; } + foreach (@resolutions) { $bugcount{$_} = 0; } + # Get information on bug states and resolutions. + for my $bug (@bugs) { + my $status = _get_value( + $removed->{'bug_status'}->{$bug}, + $bug_status, $day, $bug); + + if (defined $bugcount{$status}) { + $bugcount{$status}++; + } + + my $resolution = _get_value( + $removed->{'resolution'}->{$bug}, + $bug_resolution, $day, $bug); + + if (defined $bugcount{$resolution}) { + $bugcount{$resolution}++; + } + } + + # Generate a line of output containing the date and counts + # of bugs in each state. + my $date = sqlday($day, $base); + print DATA "$date"; + foreach (@statuses) { print DATA "|$bugcount{$_}"; } + foreach (@resolutions) { print DATA "|$bugcount{$_}"; } + print DATA "\n"; + } + + # Finish up output feedback for this product. + my $tend = time; + say "\rRegenerating $product \[100.0\%] - " . delta_time($tstart, $tend); + + close DATA; + } +} + +# A helper for --regenerate. +# For each bug that exists on a day, we determine its status/resolution +# at the beginning of the day. If there were no status/resolution +# changes on or after that day, the status was the same as it +# is today (the "current" value). Otherwise, the status was equal to the +# first "previous value" entry in the bugs_activity table for that +# bug made on or after that day. +sub _get_value { + my ($removed, $current, $day, $bug) = @_; + + # Get the first change that's on or after this day. + my $item = first { $_->{when} >= $day } @{ $removed || [] }; + + # If there's no change on or after this day, then we just return the + # current value. + return $item ? $item->{removed} : $current->{$bug}; +} + +sub today { + my ($dom, $mon, $year) = (localtime(time))[3, 4, 5]; + return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom; +} + +sub today_dash { + my ($dom, $mon, $year) = (localtime(time))[3, 4, 5]; + return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom; +} + +sub sqlday { + my ($day, $base) = @_; + $day = ($day - $base) * 86400; + my ($dom, $mon, $year) = (gmtime($day))[3, 4, 5]; + return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom; +} + +sub delta_time { + my $tstart = shift; + my $tend = shift; + my $delta = $tend - $tstart; + my $hours = int($delta/3600); + my $minutes = int($delta/60) - ($hours * 60); + my $seconds = $delta - ($minutes * 60) - ($hours * 3600); + return sprintf("%02d:%02d:%02d" , $hours, $minutes, $seconds); +} + +sub CollectSeriesData { + # We need some way of randomising the distribution of series, such that + # all of the series which are to be run every 7 days don't run on the same + # day. This is because this might put the server under severe load if a + # particular frequency, such as once a week, is very common. We achieve + # this by only running queries when: + # (days_since_epoch + series_id) % frequency = 0. So they'll run every + # <frequency> days, but the start date depends on the series_id. + my $days_since_epoch = int(time() / (60 * 60 * 24)); + my $today = today_dash(); + + # We save a copy of the main $dbh and then switch to the shadow and get + # that one too. Remember, these may be the same. + my $dbh = Bugzilla->switch_to_main_db(); + my $shadow_dbh = Bugzilla->switch_to_shadow_db(); + + my $serieses = $dbh->selectall_hashref("SELECT series_id, query, creator " . + "FROM series " . + "WHERE frequency != 0 AND " . + "MOD(($days_since_epoch + series_id), frequency) = 0", + "series_id"); + + # We prepare the insertion into the data table, for efficiency. + my $sth = $dbh->prepare("INSERT INTO series_data " . + "(series_id, series_date, series_value) " . + "VALUES (?, " . $dbh->quote($today) . ", ?)"); + + # We delete from the table beforehand, to avoid SQL errors if people run + # collectstats.pl twice on the same day. + my $deletesth = $dbh->prepare("DELETE FROM series_data + WHERE series_id = ? AND series_date = " . + $dbh->quote($today)); + + foreach my $series_id (keys %$serieses) { + # We set up the user for Search.pm's permission checking - each series + # runs with the permissions of its creator. + my $user = new Bugzilla::User($serieses->{$series_id}->{'creator'}); + my $cgi = new Bugzilla::CGI($serieses->{$series_id}->{'query'}); + my $data; + + # Do not die if Search->new() detects invalid data, such as an obsolete + # login name or a renamed product or component, etc. + eval { + my $search = new Bugzilla::Search('params' => scalar $cgi->Vars, + 'fields' => ["bug_id"], + 'allow_unlimited' => 1, + 'user' => $user); + $data = $search->data; + }; + + if (!$@) { + # We need to count the returned rows. Without subselects, we can't + # do this directly in the SQL for all queries. So we do it by hand. + my $count = scalar(@$data) || 0; + + $deletesth->execute($series_id); + $sth->execute($series_id, $count); + } + } +} + +__END__ + +=head1 NAME + +collectstats.pl - Collect data about Bugzilla bugs. + +=head1 SYNOPSIS + + ./collectstats.pl [--regenerate] [--help] + +Collects data about bugs to be used in Old and New Charts. + +=head1 OPTIONS + +=over + +=item B<--help> + +Print this help page. + +=item B<--regenerate> + +Recreate all the data about bugs, from day 1. This option is only relevant +for Old Charts, and has no effect for New Charts. +This option will overwrite all existing collected data and can take a huge +amount of time. You normally don't need to use this option (do not use it +in a cron job). + +=back + +=head1 DESCRIPTION + +This script collects data about all bugs for Old Charts, triaged by product +and by bug status and resolution. It also collects data for New Charts, based +on existing series. For New Charts, data is only collected once a series is +defined; this script cannot recreate data prior to this date. |