From 34b4a5d605b71ef14899d2a8b55ab156cb8798f0 Mon Sep 17 00:00:00 2001
From: Andy Green <andy.green@linaro.org>
Date: Mon, 17 Aug 2015 21:18:06 +0800
Subject: initial commit

---
 growth.sh | 658 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 658 insertions(+)
 create mode 100755 growth.sh

diff --git a/growth.sh b/growth.sh
new file mode 100755
index 0000000..ac00f2e
--- /dev/null
+++ b/growth.sh
@@ -0,0 +1,658 @@
+#!/bin/bash
+#
+# growth.sh
+#
+# Copyright (C) 2015 Linaro, Ltd
+# Andy Green <andy.green@linaro.org>
+# Licensed under GPL2.1
+#
+# Please run the script with no args to get comprehensive help
+#
+# Note on sqlite3 usage
+#
+# The sqlite3 db generated here is just caching analysis the script
+# generated itself.  You can delete it and the script will recreate an
+# empty one automatically, but you will have to regenerate the runs
+# that were stored in it.
+#
+# Having the data cached there is helpful both is making complex queries
+# that are difficult to reproduce using cut, sed. sort etc and in allowing
+# quick development of new graphs and queries without the cost of generating
+# the data each time.
+
+
+DB=growth.sq3
+SCHEMA_VER=1
+
+BP=
+DIRCOL=0
+rm -f .cols.tmp
+touch .cols.tmp
+rm -f .first-phase
+
+function sq3()
+{
+	sqlite3 $DB "$1"
+	if [ $? -ne 0 ] ; then
+		echo "sqlite error"
+		echo $1
+		exit 1
+	fi
+}
+
+# number of preset columns before dir ones
+OFFSET_COLS=8
+
+# return col num of dirname in DIRCOL
+# $1: dirname
+
+# $1: basis branch, $2: starting tree
+
+function basis_point()
+{
+	echo basis_point $1 $2
+
+	BP=`diff -u <(git rev-list --first-parent $2) \
+		    <(git rev-list --first-parent $1) | \
+		    sed -ne 's/^ //p' | head -1`
+
+	echo result $BP
+}
+
+# $1: basis branch, $2: empty or comparison ref, $3: basis point if known
+# $4: index in sequence, $5: run_key we are attached to
+
+function make_stat()
+{
+	local CB
+	local BASIS
+	local COMP
+	local DS
+	local DS_BASIS
+	local STATS
+
+	CB="$2"
+	if [ -z "$3" ] ; then
+		basis_point $1 $CB
+	else
+		BP=$3
+	fi
+
+	BASIS=`git describe $BP`
+	COMP=`git describe $CB`
+	DS=`git log -n 1 $CB --format=format:%ct`
+	DS_BASIS=`git show $BP --format=format:%ct | head -n1`
+
+	echo "git diff $BP..$CB --shortstat"
+
+
+	F="`git diff $BP..$CB --shortstat`"
+	FILES=`echo $F | cut -d' ' -f1`
+	ADD=`echo $F | cut -d' ' -f4`
+	REM=`echo $F | cut -d' ' -f6`
+
+	sq3 "insert into snapshots (run_idx, ref_name, ref_date, \
+			basis_name, basis_date, files_changed, \
+			loc_added, loc_removed) \
+		values ($5, \"$COMP\", $DS, \"$BASIS\", $DS_BASIS, \
+			$FILES, $ADD, $REM);"
+	SKEY=`sq3 "select seq from sqlite_sequence where \
+			name=\"snapshots\""`
+
+	git diff $BP..$CB --numstat | while read i ; do
+
+		F="`echo $i | cut -d' '  -f3`"
+		ADD=`echo $i | cut -d' '  -f1`
+		REM=`echo $i | cut -d' ' -f2`
+
+		if [ "$ADD" != "-" -a \
+			-z "`echo $F | grep gitignore`" \
+		] ; then
+
+			DEPTH=`echo "$F" | sed "s|[^/]||g" | wc -c`
+
+		sq3 "insert into dir_summary (snap_idx, run_key, dir_name, \
+				dir_depth, loc_added, loc_removed) \
+			values ($SKEY, $5, \"$F\", $DEPTH, $ADD, $REM);"
+		fi
+	done
+
+}
+
+# $1: stats file, $2: y axis, $3: output, $4: title
+
+function issue_plot_time()
+{
+	cat >plot.tmp <<EOF
+set terminal pngcairo notransparent enhanced font "arial,10" size 660, 320 
+set output '$2'
+set title "$3" 
+set yrange [ 0 : ] noreverse nowriteback
+set lmargin  9
+set rmargin  2
+set autoscale xfixmin
+set xdata time
+set termoption dash
+set timefmt "%s"
+set key autotitle columnhead
+plot $1
+EOF
+
+	>&2 echo "creating $3"
+	gnuplot plot.tmp
+}
+
+# $1: stats file, $2: y axis, $3: output, $4: title, $5: dimensions
+
+function issue_plot_file_dist()
+{
+	cat >plot.tmp <<EOF
+set terminal pngcairo notransparent enhanced font "arial,10" size $5
+set output '$3'
+set title "$4" 
+set yrange [  : $2 ] noreverse nowriteback
+set lmargin  9
+set rmargin  2
+set autoscale xfixmin
+#set xdata time
+set termoption dash
+set timefmt "%s"
+set key autotitle columnhead
+set style fill transparent solid 0.5 noborder
+set xtics rotate
+set style data histogram
+set label "$6" at graph 0.99,0.8 right
+plot $1
+EOF
+
+	>&2 echo "creating $3"
+	gnuplot plot.tmp
+}
+
+
+# create schema
+
+# one of these for each comparison run
+
+sq3 "create table if not exists runs (\
+	run_key integer primary key autoincrement, \
+	basis_hash varchar(50), \
+	comp_hash varchar(50), \
+	tags integer, \
+	schema_ver integer \
+);"
+
+# one of these for each snapshot compared
+
+sq3 "create table if not exists snapshots (\
+	snap_idx integer primary key autoincrement, \
+	run_idx integer, \
+	ref_name varchar(50), \
+	ref_date integer, \
+	basis_name varchar(50), \
+	basis_date integer, \
+	files_changed integer, \
+	loc_added integer, \
+	loc_removed integer \
+);"
+
+# one of these for each dir changed in the snapshot
+# we have run_key here as well since it simplifies finding all paths
+
+sq3 "create table if not exists dir_summary (\
+	key integer primary key autoincrement, \
+	snap_idx integer, \
+	run_key integer, \
+	dir_name varchar(150), \
+	dir_depth integer, \
+	loc_added integer, \
+	loc_removed integer \
+);"
+
+
+if [ -z "$1" ] ; then
+	>&2 echo "Usage: $0 <basis branch> --tags <tag regexp>"
+	>&2 echo "       $0 --plot <run #> [ subdir ]"
+	>&2 echo "       $0 --plot <run # a> < - | subdir > <run # b>"
+	>&2 echo ""
+	>&2 echo "$0 can be run in two modes, either create a 'run' in"
+	>&2 echo "the sqlite3 db cache, or create graphs about one or"
+	>&2 echo "comparing two runs already in the db cache"
+	>&2 echo ""
+	>&2 echo "Creating a 'run' from one or more tags"
+	>&2 echo "--------------------------------------"
+	>&2 echo ""
+	>&2 echo "A 'run' is created by studying one or more tags against"
+	>&2 echo "a 'basis branch' to isolate the patches on top of the"
+	>&2 echo "tag's basis point.  So if you have a kernel branch that"
+	>&2 echo "is tracking mainline, the various tags you have on that"
+	>&2 echo "kernel branch may be based on different mainline versions."
+	>&2 echo "$0 can autodiscover for each tag where the basis point is"
+	>&2 echo "if you just give him the basis branch name, eg, 'mainline'."
+	>&2 echo ""
+	>&2 echo " \$ $0 mainline --tags mybranch-tagname-regexp"
+	>&2 echo ""
+	>&2 echo "Notice that the tag name to analyze on one 'run' is a regexp."
+	>&2 echo "It's fine to have many tags analyzed in one 'run'."
+	>&2 echo ""
+	>&2 echo "When the run starts, the run number is reported and you"
+	>&2 echo "should make a note of it"
+	>&2 echo ""
+	>&2 echo ""
+	>&2 echo "Plotting graphs from one or two runs"
+	>&2 echo "------------------------------------"
+	>&2 echo ""
+	>&2 echo "After the analysis for the tags you are interested in has"
+	>&2 echo "been captured into 'runs' in the sqlite3 db cache, you can"
+	>&2 echo "run the script to produce png and gif graphs showing or"
+	>&2 echo "comparing the data from different runs."
+	>&2 echo ""
+	>&2 echo "There's no requirement at all that the different runs have"
+	>&2 echo "anything in common in their history, basis or content,"
+	>&2 echo "giving a lot of flexibility in the comparisons."
+	>&2 echo ""
+	>&2 echo "To produce graphs about one run itself:"
+	>&2 echo ""
+	>&2 echo " \$ $0 --plot <run #> [ subdir ]"
+	>&2 echo ""
+	>&2 echo "If subdir is missing, the whole tree is analysed, if given"
+	>&2 echo "the analysis is restricted to the subdirectory given."
+	>&2 echo ""
+	>&2 echo "To produce graphs comparing two runs:"
+	>&2 echo ""
+	>&2 echo " \$ $0 --plot <run # a> < - | subdir > <run # b>"
+	>&2 echo ""
+	>&2 echo "If there is no subdir restriction, - must be given.""
+	>&2 echo "<run # b> must contain only one tag in this case."
+	>&2 echo ""
+	>&2 echo "Graphs will be produced with the union of information"
+	>&2 echo "from run a and run b, showing run a in blue and run b in red."
+
+	exit 1
+fi
+
+LEVELS=1,2
+FILTER=$3
+if [ "$FILTER" = "-" ] ; then
+	FILTER=
+fi
+
+FILTERLEN=${#FILTER}
+F_DEPTH=`echo "$FILTER" | sed "s|[^/]||g" | wc -c`
+
+if [ ! -z "$FILTER" ] ; then
+	if [ $F_DEPTH == 1 ] ; then
+		LEVELS=1,2,3
+	else if [ $F_DEPTH == 2 ] ; then
+		LEVELS=1,2,3,4
+		else if [ $F_DEPTH == 3 ] ; then
+			LEVELS=1,2,3,4,5
+			else
+				LEVELS=1,2,3,4,5,6
+			fi
+		fi
+	fi
+fi
+
+#
+# plot mode
+#
+
+if [ "$1" = "--plot" ] ; then
+
+	PLOT_RUN=$2
+	COMP_RUN=$4
+
+	R=`sq3 "select comp_hash,basis_hash from runs where run_key=$PLOT_RUN"|\
+		tr '|' '-'`
+
+	#
+	# get a list of snapshot idxs for both runs combined
+	#
+
+	if [ ! -z "$COMP_RUN" ] ; then
+		RUN_IDX_COMP="(run_idx=$PLOT_RUN or run_idx=$COMP_RUN)"
+		RUN_KEY_COMP="(run_key=$PLOT_RUN or run_key=$COMP_RUN)"
+		CR=`sq3 "select comp_hash,basis_hash from runs where \
+			run_key=$COMP_RUN" | tr '|' '-'`
+		R="$R"-VS-$CR
+	else
+		RUN_IDX_COMP="run_idx=$PLOT_RUN"
+		RUN_KEY_COMP="run_key=$PLOT_RUN"
+	fi
+echo $R
+	# our snapshots
+	SN=`sq3 "select snap_idx from snapshots where run_idx=$PLOT_RUN"`
+
+	# there's a comparison snapshot?
+	SNC=
+	if [ ! -z "$COMP_RUN" ] ; then
+		SNC=`sq3 "select snap_idx from snapshots where run_idx=$COMP_RUN"`
+		COUNT=
+		for i in $SNC ; do
+			if [ ! -z "$COUNT" ] ; then
+				>&2 echo "Must be single comparison snapshot"
+				exit 1
+			fi
+			COUNT=x
+		done
+	fi
+
+	rm -f .plot.tmp
+	rm -f .plot.cols .plot.cols1
+	rm -f .plot.dist
+
+	#
+	# using both runs if two given,
+	# create the column header row, and fill .plot.cols with the
+	# list of files / dirs changed in this view of the diff
+	# 
+
+	echo -n "basis_name basis_date ref_name ref_date files add del " \
+								> .plot.tmp
+	sq3 "select dir_name from dir_summary where \
+		$RUN_KEY_COMP and \
+		substr(dir_name, 1, $FILTERLEN)=\"$FILTER\" \
+		order by loc_added,loc_removed asc" | \
+		cut -d'/' -f$LEVELS | while read i ; do
+		if [ ! -z "`echo "$i" | grep ^Documentation/`" ] ; then
+			echo "Documentation" >> .plot.cols1
+		else
+			if [ "$i" != "." ] ; then
+				# don't allow individual files
+				if [ ! -d "$i" ] ; then
+					dirname $i >> .plot.cols1
+				else
+					echo $i >> .plot.cols1
+				fi
+			fi
+		fi
+	done
+
+	#
+	# put the column titles in place and write out the
+	# filtered list of files/dirs we will care about
+	#
+	cat .plot.cols1 | sort | uniq | while read i ; do
+		echo -n "$i " >> .plot.tmp
+		echo $i >> .plot.cols
+	done
+	echo >> .plot.tmp
+
+	#
+	# find out how many snapshots created by the run he's using
+	# it doesn't include any comparison snapshot
+	#
+	N=0
+	for i in $SN ; do
+		N=$(( $N + 1 ))
+	done
+
+	# how many files were changed
+	CHANGEDFILES=`wc -l .plot.cols | cut -d' ' -f1`
+
+	>&2 echo "Studying $N snapshots"
+	>&2 echo "Total $CHANGEDFILES files changed"
+
+	#
+	# For each snapshot, go through the list of changed files/dirs and
+	# find out how much changed there in that snapshot
+	#
+	T=1
+	for i in $SN ; do
+		L=`sq3 "select basis_name, basis_date, ref_name, \
+			ref_date, files_changed, loc_added, loc_removed\
+			from snapshots where snap_idx=$i" | tr '|' ' '`
+		echo -n $L >> .plot.tmp
+
+		>&2 echo -n -e "Snapshot $T/$N: `echo $L | cut -d' ' -f3`      \r"
+		T=$(( $T + 1 ))
+
+		cat .plot.cols | while read j ; do
+			JLEN=${#j}
+
+			# are we going to deal with his subdirs?
+			if [ -z "`cat .plot.cols | grep "$j/"`" ] ; then
+
+				# everything inside this dir
+				A=`sq3 "select sum(loc_added) \
+					from dir_summary where \
+					 snap_idx=$i and \
+					 substr(dir_name, 1, $JLEN)=\"$j\"\
+					" | head -n1`
+
+			else
+				# it's truncated, so only files in this dir
+				# eg arch, but arch/arm is handled elsewhere
+
+				DEP=$(( `echo "$j" | sed "s|[^/]||g" | wc -c` + 1 ))
+				A=`sq3 "select sum(loc_added) \
+					from dir_summary where\
+				 	snap_idx=$i and \
+					substr(dir_name, 1, $JLEN)=\"$j\" and \
+					dir_depth=$DEP" | head -n1`
+			fi
+
+			if [ -z "$A" ] ; then
+				echo -n "0 " >> .plot.tmp
+			else
+				echo -n "$A " >> .plot.tmp
+			fi
+		done
+		echo >> .plot.tmp
+	done
+
+	>&2 echo
+
+	#
+	# for plots related to changes over time, we can do them now
+	#
+
+	issue_plot_time "'.plot.tmp' using \
+		4:6 notitle  with filledcurve y1=0 lc rgb \"#0000ff\"" \
+		"growth-$R-LOC.png" "$R growth in LOC"
+
+	issue_plot_time "'.plot.tmp' using \
+		4:( (\$4-\$2)/(24 * 3600) ) notitle \
+		with filledcurve y1=0 lc rgb \"#0000ff\"" \
+		   "growth-$R-basis-age.png" "$R growth basis age (days)"
+
+	rm -f .plot.tmp1
+
+	#
+	# for each file / dir that has changes in any snapshot, for each
+	# snapshot calculate its changes and create a unified plot data file
+	#
+	echo 0 > .biggest
+
+	T=1
+	cat .plot.cols | while read j ; do
+
+		>&2 echo -n -e "File $T/$CHANGEDFILES    \r"
+		T=$(( $T + 1 ))
+
+		echo -n "$j " >> .plot.tmp1
+
+		BIGGEST=`cat .biggest`
+
+		JLEN=${#j}
+
+		for i in $SN $SNC ; do
+			if [ -z "`cat .plot.cols | grep "$j/"`" ] ; then
+				# everything inside the dir
+			A=`sq3 "select sum(loc_added) \
+				from dir_summary where\
+                         	snap_idx=$i and \
+				substr(dir_name, 1, $JLEN)=\"$j\" \
+				"|head -n1`
+
+			D=`sq3 "select sum(loc_removed) \
+				from dir_summary where\
+				snap_idx=$i and \
+				substr(dir_name, 1, $JLEN)=\"$j\" \
+				"|head -n1`
+			else
+				# it's truncated, so only files in this dir
+				# eg arch, but arch/arm is handled elsewhere
+				DEP=$(( `echo "$j" | sed "s|[^/]||g" | wc -c` + 1 ))
+
+				A=`sq3 "select sum(loc_added) \
+					from dir_summary where \
+				 	snap_idx=$i and \
+					substr(dir_name, 1, $JLEN)=\"$j\"\
+					and dir_depth=$DEP" | head -n1`
+
+				D=`sq3 "select sum(loc_removed) \
+					from dir_summary where \
+				 	snap_idx=$i and \
+					substr(dir_name, 1, $JLEN)=\"$j\" \
+					and dir_depth=$DEP" | head -n1`
+			fi
+
+			if [ ! -z "$A" -a ! -z "$D" ] ; then
+				V=$(( $A - $D ))
+
+				echo -n "$V " >> .plot.tmp1
+
+				if [ $V -gt $BIGGEST ] ; then
+					BIGGEST=$V
+					echo $V > .biggest
+				fi
+			else
+				echo -n "0 " >> .plot.tmp1
+			fi
+		done
+
+		echo >> .plot.tmp1
+	done
+
+	>&2 echo
+
+	echo -n "idx dir " > .plot.tmp
+	for i in $SN $SNC ; do
+		V="`sq3 "select ref_name \
+			from snapshots where snap_idx=$i"`"
+
+		echo -n "$V " >> .plot.tmp
+	done
+	echo >> .plot.tmp
+
+	C=0
+	sort -k$(( $N + 1 )) -nr .plot.tmp1 | while read i ; do
+		echo "$C $i" >> .plot.tmp
+		C=$(( $C + 1 ))
+	done
+
+	WIDTH=$(( 16 * `cat .plot.cols | wc -l` ))
+	if [ $WIDTH -lt 640 ] ; then
+		WIDTH=640
+	fi
+
+	# plot each snapshot in turn
+
+	C=1
+	while [ $C -le $N ]; do
+
+		BIGGEST=`cat .biggest`
+
+		TOT=`cat .plot.tmp |tail -n+2 | \
+			cut -d' ' -f$(( $C + 2 )) |paste -sd+ | bc`
+		_FILTER=`echo "$FILTER" | sed "s|/|_|g"`
+		if [ ! -z "$_FILTER" ] ; then
+			_FILTER=$_FILTER-
+		fi
+
+		PL="'.plot.tmp' using 1:$(( $C + 2 )):xtic(2) \
+                        w boxes lc rgb \"#0000ff\" "
+
+		Q="`cat .plot.tmp | head -n1 | \
+				cut -d' ' -f$(( $C + 2 ))`"
+
+		_TOT="Total LOC $Q: $TOT"
+
+		echo $_TOT
+
+		if [ ! -z "$SNC" ] ; then
+			PL="'.plot.tmp' using 1:$(( $N + 3 )):xtic(2) \
+                        w boxes lc rgb \"#ff0000\",$PL"
+			TOTC="`cat .plot.tmp |tail -n+2 | \
+				cut -d' ' -f$(( $N + 3 )) |paste -sd+ | bc`"
+			Q="`cat .plot.tmp | head -n1 | \
+				cut -d' ' -f$(( $N + 3 ))`" \
+
+			_TOT="Total LOC $Q: $TOTC\n$_TOT"
+		fi
+
+		issue_plot_file_dist \
+			"$PL" $BIGGEST \
+			"growth-$R-dist-$_FILTER`printf %04d $C`.png" \
+			"$R patch distribution (LOC) $3" \
+			$WIDTH,480 "$_TOT"
+		C=$(( $C + 1 ))
+	done
+
+	>&2 echo "Converting gif"
+	convert -delay 50 -loop 0 growth-$R-dist-????.png growth-$R-dist.gif
+
+	exit 0
+fi
+
+#
+# Tagged rebase tree mode
+#
+
+if [ "$2" = "--tags" ] ; then
+	if [ -z "$3" ] ; then
+		>&2 echo "Need tag regexp filter with --tags"
+		exit 1
+	fi
+
+	sq3 "insert into runs ( \
+			run_key, basis_hash, comp_hash, schema_ver) \
+			values (NULL, \"$1\", \"$3\", \"$SCHEMA_VER\"); \
+		"
+	RUNKEY=`sq3 "select seq from sqlite_sequence where \
+			name=\"runs\""`
+
+	>&2 echo "tags mode -- run $RUNKEY"
+	index=0
+	for i in `git tag | grep "$3"` ; do
+		>&2 echo $i
+		make_stat $1 $i "" $index $RUNKEY
+		index=$(( $index + 1 ))
+	done
+
+	exit 0
+fi
+
+exit 0
+
+# ---> untested
+
+#
+# History tree mode
+#
+
+if [ ! -z "$2" ] ; then
+	COMP=$2
+else
+	COMP=`git rev-parse --abbrev-ref HEAD`
+fi
+
+basis_point $1 $COMP
+
+git log $BP.. --oneline | \
+	cut -d' ' -f1 | \
+	tac > .patches.tmp
+
+TODO=`wc -l .patches.tmp | cut -d' ' -f 1`
+C=1
+
+cat .patches.tmp | while read i ; do
+	>&2 echo "Patch $C/$TODO"
+	make_stat $1 $i $BP
+	C=$(( $C + 1 ))
+done
+
+exit 0
+
-- 
cgit v1.2.3