From 34b4a5d605b71ef14899d2a8b55ab156cb8798f0 Mon Sep 17 00:00:00 2001 From: Andy Green Date: Mon, 17 Aug 2015 21:18:06 +0800 Subject: initial commit --- growth.sh | 658 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 658 insertions(+) create mode 100755 growth.sh diff --git a/growth.sh b/growth.sh new file mode 100755 index 0000000..ac00f2e --- /dev/null +++ b/growth.sh @@ -0,0 +1,658 @@ +#!/bin/bash +# +# growth.sh +# +# Copyright (C) 2015 Linaro, Ltd +# Andy Green +# Licensed under GPL2.1 +# +# Please run the script with no args to get comprehensive help +# +# Note on sqlite3 usage +# +# The sqlite3 db generated here is just caching analysis the script +# generated itself. You can delete it and the script will recreate an +# empty one automatically, but you will have to regenerate the runs +# that were stored in it. +# +# Having the data cached there is helpful both is making complex queries +# that are difficult to reproduce using cut, sed. sort etc and in allowing +# quick development of new graphs and queries without the cost of generating +# the data each time. + + +DB=growth.sq3 +SCHEMA_VER=1 + +BP= +DIRCOL=0 +rm -f .cols.tmp +touch .cols.tmp +rm -f .first-phase + +function sq3() +{ + sqlite3 $DB "$1" + if [ $? -ne 0 ] ; then + echo "sqlite error" + echo $1 + exit 1 + fi +} + +# number of preset columns before dir ones +OFFSET_COLS=8 + +# return col num of dirname in DIRCOL +# $1: dirname + +# $1: basis branch, $2: starting tree + +function basis_point() +{ + echo basis_point $1 $2 + + BP=`diff -u <(git rev-list --first-parent $2) \ + <(git rev-list --first-parent $1) | \ + sed -ne 's/^ //p' | head -1` + + echo result $BP +} + +# $1: basis branch, $2: empty or comparison ref, $3: basis point if known +# $4: index in sequence, $5: run_key we are attached to + +function make_stat() +{ + local CB + local BASIS + local COMP + local DS + local DS_BASIS + local STATS + + CB="$2" + if [ -z "$3" ] ; then + basis_point $1 $CB + else + BP=$3 + fi + + BASIS=`git describe $BP` + COMP=`git describe $CB` + DS=`git log -n 1 $CB --format=format:%ct` + DS_BASIS=`git show $BP --format=format:%ct | head -n1` + + echo "git diff $BP..$CB --shortstat" + + + F="`git diff $BP..$CB --shortstat`" + FILES=`echo $F | cut -d' ' -f1` + ADD=`echo $F | cut -d' ' -f4` + REM=`echo $F | cut -d' ' -f6` + + sq3 "insert into snapshots (run_idx, ref_name, ref_date, \ + basis_name, basis_date, files_changed, \ + loc_added, loc_removed) \ + values ($5, \"$COMP\", $DS, \"$BASIS\", $DS_BASIS, \ + $FILES, $ADD, $REM);" + SKEY=`sq3 "select seq from sqlite_sequence where \ + name=\"snapshots\""` + + git diff $BP..$CB --numstat | while read i ; do + + F="`echo $i | cut -d' ' -f3`" + ADD=`echo $i | cut -d' ' -f1` + REM=`echo $i | cut -d' ' -f2` + + if [ "$ADD" != "-" -a \ + -z "`echo $F | grep gitignore`" \ + ] ; then + + DEPTH=`echo "$F" | sed "s|[^/]||g" | wc -c` + + sq3 "insert into dir_summary (snap_idx, run_key, dir_name, \ + dir_depth, loc_added, loc_removed) \ + values ($SKEY, $5, \"$F\", $DEPTH, $ADD, $REM);" + fi + done + +} + +# $1: stats file, $2: y axis, $3: output, $4: title + +function issue_plot_time() +{ + cat >plot.tmp <&2 echo "creating $3" + gnuplot plot.tmp +} + +# $1: stats file, $2: y axis, $3: output, $4: title, $5: dimensions + +function issue_plot_file_dist() +{ + cat >plot.tmp <&2 echo "creating $3" + gnuplot plot.tmp +} + + +# create schema + +# one of these for each comparison run + +sq3 "create table if not exists runs (\ + run_key integer primary key autoincrement, \ + basis_hash varchar(50), \ + comp_hash varchar(50), \ + tags integer, \ + schema_ver integer \ +);" + +# one of these for each snapshot compared + +sq3 "create table if not exists snapshots (\ + snap_idx integer primary key autoincrement, \ + run_idx integer, \ + ref_name varchar(50), \ + ref_date integer, \ + basis_name varchar(50), \ + basis_date integer, \ + files_changed integer, \ + loc_added integer, \ + loc_removed integer \ +);" + +# one of these for each dir changed in the snapshot +# we have run_key here as well since it simplifies finding all paths + +sq3 "create table if not exists dir_summary (\ + key integer primary key autoincrement, \ + snap_idx integer, \ + run_key integer, \ + dir_name varchar(150), \ + dir_depth integer, \ + loc_added integer, \ + loc_removed integer \ +);" + + +if [ -z "$1" ] ; then + >&2 echo "Usage: $0 --tags " + >&2 echo " $0 --plot [ subdir ]" + >&2 echo " $0 --plot < - | subdir > " + >&2 echo "" + >&2 echo "$0 can be run in two modes, either create a 'run' in" + >&2 echo "the sqlite3 db cache, or create graphs about one or" + >&2 echo "comparing two runs already in the db cache" + >&2 echo "" + >&2 echo "Creating a 'run' from one or more tags" + >&2 echo "--------------------------------------" + >&2 echo "" + >&2 echo "A 'run' is created by studying one or more tags against" + >&2 echo "a 'basis branch' to isolate the patches on top of the" + >&2 echo "tag's basis point. So if you have a kernel branch that" + >&2 echo "is tracking mainline, the various tags you have on that" + >&2 echo "kernel branch may be based on different mainline versions." + >&2 echo "$0 can autodiscover for each tag where the basis point is" + >&2 echo "if you just give him the basis branch name, eg, 'mainline'." + >&2 echo "" + >&2 echo " \$ $0 mainline --tags mybranch-tagname-regexp" + >&2 echo "" + >&2 echo "Notice that the tag name to analyze on one 'run' is a regexp." + >&2 echo "It's fine to have many tags analyzed in one 'run'." + >&2 echo "" + >&2 echo "When the run starts, the run number is reported and you" + >&2 echo "should make a note of it" + >&2 echo "" + >&2 echo "" + >&2 echo "Plotting graphs from one or two runs" + >&2 echo "------------------------------------" + >&2 echo "" + >&2 echo "After the analysis for the tags you are interested in has" + >&2 echo "been captured into 'runs' in the sqlite3 db cache, you can" + >&2 echo "run the script to produce png and gif graphs showing or" + >&2 echo "comparing the data from different runs." + >&2 echo "" + >&2 echo "There's no requirement at all that the different runs have" + >&2 echo "anything in common in their history, basis or content," + >&2 echo "giving a lot of flexibility in the comparisons." + >&2 echo "" + >&2 echo "To produce graphs about one run itself:" + >&2 echo "" + >&2 echo " \$ $0 --plot [ subdir ]" + >&2 echo "" + >&2 echo "If subdir is missing, the whole tree is analysed, if given" + >&2 echo "the analysis is restricted to the subdirectory given." + >&2 echo "" + >&2 echo "To produce graphs comparing two runs:" + >&2 echo "" + >&2 echo " \$ $0 --plot < - | subdir > " + >&2 echo "" + >&2 echo "If there is no subdir restriction, - must be given."" + >&2 echo " must contain only one tag in this case." + >&2 echo "" + >&2 echo "Graphs will be produced with the union of information" + >&2 echo "from run a and run b, showing run a in blue and run b in red." + + exit 1 +fi + +LEVELS=1,2 +FILTER=$3 +if [ "$FILTER" = "-" ] ; then + FILTER= +fi + +FILTERLEN=${#FILTER} +F_DEPTH=`echo "$FILTER" | sed "s|[^/]||g" | wc -c` + +if [ ! -z "$FILTER" ] ; then + if [ $F_DEPTH == 1 ] ; then + LEVELS=1,2,3 + else if [ $F_DEPTH == 2 ] ; then + LEVELS=1,2,3,4 + else if [ $F_DEPTH == 3 ] ; then + LEVELS=1,2,3,4,5 + else + LEVELS=1,2,3,4,5,6 + fi + fi + fi +fi + +# +# plot mode +# + +if [ "$1" = "--plot" ] ; then + + PLOT_RUN=$2 + COMP_RUN=$4 + + R=`sq3 "select comp_hash,basis_hash from runs where run_key=$PLOT_RUN"|\ + tr '|' '-'` + + # + # get a list of snapshot idxs for both runs combined + # + + if [ ! -z "$COMP_RUN" ] ; then + RUN_IDX_COMP="(run_idx=$PLOT_RUN or run_idx=$COMP_RUN)" + RUN_KEY_COMP="(run_key=$PLOT_RUN or run_key=$COMP_RUN)" + CR=`sq3 "select comp_hash,basis_hash from runs where \ + run_key=$COMP_RUN" | tr '|' '-'` + R="$R"-VS-$CR + else + RUN_IDX_COMP="run_idx=$PLOT_RUN" + RUN_KEY_COMP="run_key=$PLOT_RUN" + fi +echo $R + # our snapshots + SN=`sq3 "select snap_idx from snapshots where run_idx=$PLOT_RUN"` + + # there's a comparison snapshot? + SNC= + if [ ! -z "$COMP_RUN" ] ; then + SNC=`sq3 "select snap_idx from snapshots where run_idx=$COMP_RUN"` + COUNT= + for i in $SNC ; do + if [ ! -z "$COUNT" ] ; then + >&2 echo "Must be single comparison snapshot" + exit 1 + fi + COUNT=x + done + fi + + rm -f .plot.tmp + rm -f .plot.cols .plot.cols1 + rm -f .plot.dist + + # + # using both runs if two given, + # create the column header row, and fill .plot.cols with the + # list of files / dirs changed in this view of the diff + # + + echo -n "basis_name basis_date ref_name ref_date files add del " \ + > .plot.tmp + sq3 "select dir_name from dir_summary where \ + $RUN_KEY_COMP and \ + substr(dir_name, 1, $FILTERLEN)=\"$FILTER\" \ + order by loc_added,loc_removed asc" | \ + cut -d'/' -f$LEVELS | while read i ; do + if [ ! -z "`echo "$i" | grep ^Documentation/`" ] ; then + echo "Documentation" >> .plot.cols1 + else + if [ "$i" != "." ] ; then + # don't allow individual files + if [ ! -d "$i" ] ; then + dirname $i >> .plot.cols1 + else + echo $i >> .plot.cols1 + fi + fi + fi + done + + # + # put the column titles in place and write out the + # filtered list of files/dirs we will care about + # + cat .plot.cols1 | sort | uniq | while read i ; do + echo -n "$i " >> .plot.tmp + echo $i >> .plot.cols + done + echo >> .plot.tmp + + # + # find out how many snapshots created by the run he's using + # it doesn't include any comparison snapshot + # + N=0 + for i in $SN ; do + N=$(( $N + 1 )) + done + + # how many files were changed + CHANGEDFILES=`wc -l .plot.cols | cut -d' ' -f1` + + >&2 echo "Studying $N snapshots" + >&2 echo "Total $CHANGEDFILES files changed" + + # + # For each snapshot, go through the list of changed files/dirs and + # find out how much changed there in that snapshot + # + T=1 + for i in $SN ; do + L=`sq3 "select basis_name, basis_date, ref_name, \ + ref_date, files_changed, loc_added, loc_removed\ + from snapshots where snap_idx=$i" | tr '|' ' '` + echo -n $L >> .plot.tmp + + >&2 echo -n -e "Snapshot $T/$N: `echo $L | cut -d' ' -f3` \r" + T=$(( $T + 1 )) + + cat .plot.cols | while read j ; do + JLEN=${#j} + + # are we going to deal with his subdirs? + if [ -z "`cat .plot.cols | grep "$j/"`" ] ; then + + # everything inside this dir + A=`sq3 "select sum(loc_added) \ + from dir_summary where \ + snap_idx=$i and \ + substr(dir_name, 1, $JLEN)=\"$j\"\ + " | head -n1` + + else + # it's truncated, so only files in this dir + # eg arch, but arch/arm is handled elsewhere + + DEP=$(( `echo "$j" | sed "s|[^/]||g" | wc -c` + 1 )) + A=`sq3 "select sum(loc_added) \ + from dir_summary where\ + snap_idx=$i and \ + substr(dir_name, 1, $JLEN)=\"$j\" and \ + dir_depth=$DEP" | head -n1` + fi + + if [ -z "$A" ] ; then + echo -n "0 " >> .plot.tmp + else + echo -n "$A " >> .plot.tmp + fi + done + echo >> .plot.tmp + done + + >&2 echo + + # + # for plots related to changes over time, we can do them now + # + + issue_plot_time "'.plot.tmp' using \ + 4:6 notitle with filledcurve y1=0 lc rgb \"#0000ff\"" \ + "growth-$R-LOC.png" "$R growth in LOC" + + issue_plot_time "'.plot.tmp' using \ + 4:( (\$4-\$2)/(24 * 3600) ) notitle \ + with filledcurve y1=0 lc rgb \"#0000ff\"" \ + "growth-$R-basis-age.png" "$R growth basis age (days)" + + rm -f .plot.tmp1 + + # + # for each file / dir that has changes in any snapshot, for each + # snapshot calculate its changes and create a unified plot data file + # + echo 0 > .biggest + + T=1 + cat .plot.cols | while read j ; do + + >&2 echo -n -e "File $T/$CHANGEDFILES \r" + T=$(( $T + 1 )) + + echo -n "$j " >> .plot.tmp1 + + BIGGEST=`cat .biggest` + + JLEN=${#j} + + for i in $SN $SNC ; do + if [ -z "`cat .plot.cols | grep "$j/"`" ] ; then + # everything inside the dir + A=`sq3 "select sum(loc_added) \ + from dir_summary where\ + snap_idx=$i and \ + substr(dir_name, 1, $JLEN)=\"$j\" \ + "|head -n1` + + D=`sq3 "select sum(loc_removed) \ + from dir_summary where\ + snap_idx=$i and \ + substr(dir_name, 1, $JLEN)=\"$j\" \ + "|head -n1` + else + # it's truncated, so only files in this dir + # eg arch, but arch/arm is handled elsewhere + DEP=$(( `echo "$j" | sed "s|[^/]||g" | wc -c` + 1 )) + + A=`sq3 "select sum(loc_added) \ + from dir_summary where \ + snap_idx=$i and \ + substr(dir_name, 1, $JLEN)=\"$j\"\ + and dir_depth=$DEP" | head -n1` + + D=`sq3 "select sum(loc_removed) \ + from dir_summary where \ + snap_idx=$i and \ + substr(dir_name, 1, $JLEN)=\"$j\" \ + and dir_depth=$DEP" | head -n1` + fi + + if [ ! -z "$A" -a ! -z "$D" ] ; then + V=$(( $A - $D )) + + echo -n "$V " >> .plot.tmp1 + + if [ $V -gt $BIGGEST ] ; then + BIGGEST=$V + echo $V > .biggest + fi + else + echo -n "0 " >> .plot.tmp1 + fi + done + + echo >> .plot.tmp1 + done + + >&2 echo + + echo -n "idx dir " > .plot.tmp + for i in $SN $SNC ; do + V="`sq3 "select ref_name \ + from snapshots where snap_idx=$i"`" + + echo -n "$V " >> .plot.tmp + done + echo >> .plot.tmp + + C=0 + sort -k$(( $N + 1 )) -nr .plot.tmp1 | while read i ; do + echo "$C $i" >> .plot.tmp + C=$(( $C + 1 )) + done + + WIDTH=$(( 16 * `cat .plot.cols | wc -l` )) + if [ $WIDTH -lt 640 ] ; then + WIDTH=640 + fi + + # plot each snapshot in turn + + C=1 + while [ $C -le $N ]; do + + BIGGEST=`cat .biggest` + + TOT=`cat .plot.tmp |tail -n+2 | \ + cut -d' ' -f$(( $C + 2 )) |paste -sd+ | bc` + _FILTER=`echo "$FILTER" | sed "s|/|_|g"` + if [ ! -z "$_FILTER" ] ; then + _FILTER=$_FILTER- + fi + + PL="'.plot.tmp' using 1:$(( $C + 2 )):xtic(2) \ + w boxes lc rgb \"#0000ff\" " + + Q="`cat .plot.tmp | head -n1 | \ + cut -d' ' -f$(( $C + 2 ))`" + + _TOT="Total LOC $Q: $TOT" + + echo $_TOT + + if [ ! -z "$SNC" ] ; then + PL="'.plot.tmp' using 1:$(( $N + 3 )):xtic(2) \ + w boxes lc rgb \"#ff0000\",$PL" + TOTC="`cat .plot.tmp |tail -n+2 | \ + cut -d' ' -f$(( $N + 3 )) |paste -sd+ | bc`" + Q="`cat .plot.tmp | head -n1 | \ + cut -d' ' -f$(( $N + 3 ))`" \ + + _TOT="Total LOC $Q: $TOTC\n$_TOT" + fi + + issue_plot_file_dist \ + "$PL" $BIGGEST \ + "growth-$R-dist-$_FILTER`printf %04d $C`.png" \ + "$R patch distribution (LOC) $3" \ + $WIDTH,480 "$_TOT" + C=$(( $C + 1 )) + done + + >&2 echo "Converting gif" + convert -delay 50 -loop 0 growth-$R-dist-????.png growth-$R-dist.gif + + exit 0 +fi + +# +# Tagged rebase tree mode +# + +if [ "$2" = "--tags" ] ; then + if [ -z "$3" ] ; then + >&2 echo "Need tag regexp filter with --tags" + exit 1 + fi + + sq3 "insert into runs ( \ + run_key, basis_hash, comp_hash, schema_ver) \ + values (NULL, \"$1\", \"$3\", \"$SCHEMA_VER\"); \ + " + RUNKEY=`sq3 "select seq from sqlite_sequence where \ + name=\"runs\""` + + >&2 echo "tags mode -- run $RUNKEY" + index=0 + for i in `git tag | grep "$3"` ; do + >&2 echo $i + make_stat $1 $i "" $index $RUNKEY + index=$(( $index + 1 )) + done + + exit 0 +fi + +exit 0 + +# ---> untested + +# +# History tree mode +# + +if [ ! -z "$2" ] ; then + COMP=$2 +else + COMP=`git rev-parse --abbrev-ref HEAD` +fi + +basis_point $1 $COMP + +git log $BP.. --oneline | \ + cut -d' ' -f1 | \ + tac > .patches.tmp + +TODO=`wc -l .patches.tmp | cut -d' ' -f 1` +C=1 + +cat .patches.tmp | while read i ; do + >&2 echo "Patch $C/$TODO" + make_stat $1 $i $BP + C=$(( $C + 1 )) +done + +exit 0 + -- cgit v1.2.3