perfdatadir2csv.sh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170

#!/bin/bash

set -eu -o pipefail

buildid_dir="global"
perf_dirs=()
event="cycles"
format="sample,overhead"
sort_format_field=""
declare -A num_entries
num_entries[total]="100"
num_entries[dso]=${num_entries[total]}
num_entries[symbol]=${num_entries[total]}
perf_bin=""
results_dir=""
add_time=false
verbose=0

while test $# -gt 0; do
    case $1 in
	--buildid-dir) buildid_dir="$2"; shift ;;
	--dir|-d) perf_dirs=("${perf_dirs[@]}" "$(cd $2; pwd)"); shift ;;
	--event|-e) event="$2"; shift ;;
	--format|-f) format="$2"; shift ;;
	--num|-n) num_entries[total]="$2"; shift ;;
	--num-dsos) num_entries[dso]="$2"; shift ;;
	--num-symbols) num_entries[symbol]="$2"; shift ;;
	--perf-bin) perf_bin="$2"; shift ;;
	--results-dir) results_dir="$2"; shift ;;
	--sort-field) sort_format_field="$2"; shift ;;
	--time|-t) add_time=true ;;
	--verbose=*) verbose="${1#*=}" ;;
	--verbose) verbose=$(($verbose+1)) ;;
	*) echo "ERROR: Unknown option $1"; exit 1 ;;
    esac
    shift
done

spectime ()
{
    local perf_dir="$1"
    local data="$2"
    local format="$3"

    id=$(basename $perf_dir | cut -d. -f 2)
    bmk=$(basename $data .data)
    results_path=$(dirname $perf_dir)
    time=$(cat $results_path/CINT2006.$id.ref.csv* \
		$results_path/CFP2006.$id.ref.csv* 2>/dev/null \
		   | grep "^$bmk.*SelectedIteration" | cut -d, -f3)
    time=$(echo "scale=0; $time/1" | bc)
    echo "$bmk,time,$format" | sed -e "s/sample/$time/" -e "s/overhead/100%/"
}

if [ $verbose -ge 1 ]; then
    set -x
fi

if [ x"$perf_bin" = x"" ]; then
    echo "ERROR: Specify perf binary that was used to generate perf.data files"
    echo "       via --perf-bin option."
    echo "       E.g., --perf-bin /usr/lib/linux-tools/<hw_tag>/perf"
    exit 1
fi

if [ x"$results_dir" != x"" ]; then
    mapfile -t failed_csvs < <(find "$results_dir" -name "failed.*.csv*" | sort)
    mapfile -t perf_dirs < <(find "$results_dir" -name "perf.*.data" | sort)
fi

for perf_dir in "${perf_dirs[@]}"; do
    if ! [ -d "$perf_dir" ]; then
	echo "ERROR: Directory does not exist: $perf_dir"
	exit 1
    fi
done

if [ x"$sort_format_field" != x"" ]; then
    key="2"
    sort=""
    for i in $(echo "$format" | tr "," " "); do
	if [ x"$i" = x"$sort_format_field" ]; then
	    sort="sort -t, -k$key -g -r"
	    break
	fi
	key=$(($key+1))
    done
    if [ x"$sort" = x"" ]; then
	echo "ERROR: Did not find sort_format_field $sort_format_field in format $format"
	exit 1
    fi
else
    sort="cat"
fi

# Print out CSV header
echo "benchmark,symbol,$format"

# Print out entries for failed-to-build and failed-to-run benchmarks
# csvs2table.py records data only for the 1st occurence of every symbol,
# so these will override any data which may show up in the profile.
for failed_csv in "${failed_csvs[@]}"; do
    if [ x"$format" = x"sample" ]; then
	cat "$failed_csv" | cut -d"," -f 1-3
    fi
done

for perf_dir in "${perf_dirs[@]}"; do
    case "$buildid_dir" in
	"global"|"none")
	    buildid_opt=""
	    ;;
	"local")
	    buildid_opt="--buildid-dir $perf_dir/.debug"
	    ;;
	*)
	    buildid_opt="--buildid-dir $buildid_dir"
	    ;;
    esac

    for data in $(cd "$perf_dir"; ls [1-8]*.data | sort); do
	if $add_time; then
	    spectime "$perf_dir" "$data" "$format"
	fi
	tmpout3=$(mktemp)
	for report_field in dso symbol; do
	    tmpout=$(mktemp)

	    perf_workaround_sort=""
	    perf_workaround_sed=""
	    if [ x"$report_field" = x"symbol" ]; then
		# FIXME: In the above command we first add "dso" to the sort
		# fields, and then remove this last item in the sed later.
		# This is to workaround what appears to be a perf-report bug.
		# Without "dso" among the sort fields, we sometimes get
		# samples for symbols scattered between several entries, e.g.,
		# 444.namd:
		# [.] _ZN20ComputeNonbondedUtil26calc_pair_energy_fullelectEP9nonbonded,819,5004
		# [.] _ZN20ComputeNonbondedUtil26calc_pair_energy_fullelectEP9nonbonded,500,5004
		# [.] _ZN20ComputeNonbondedUtil26calc_pair_energy_fullelectEP9nonbonded,258,5004
		# which then causes csvs2table.py to disregard all but the last
		# entry.
		# Using "dso" seems to force merging of the symbol sample data,
		# which is what we require.
		# This is seen with perf version 4.18.0-13-generic.
		perf_workaround_sort=",dso"
		perf_workaround_sed="-e s/,[^,]\+\$//"
	    fi

	    format1=$(echo "$format" | sed -e "s/size/${report_field}_size/g")
	    $perf_bin $buildid_opt report --no-demangle -f -i "$perf_dir/$data" --stdio \
		      -g none --no-children -F $report_field,$format1 \
		      -s sample$perf_workaround_sort \
		      -t, 2>/dev/null \
		| awk "
BEGIN { found_samples=0; found_command=0 }
{ if (ignore_and_exit) { next } }
/^# Samples: .* of event '${event}['/]/ { if (found_samples) { ignore_and_exit=1; next }; found_samples=1; next }
/^# .*, *Samples/ { if (found_samples) { found_command=1 }; next }
/^#/ { if (found_command) { ignore_and_exit=1; next } }
/^$/ { next }
{ if (found_command) { print \$0 } }
" | sed -e "s/ *, */,/g" $perf_workaround_sed > $tmpout

	    head -n ${num_entries[$report_field]} $tmpout
	done | $sort > $tmpout3
	head -n ${num_entries[total]} $tmpout3 | sed -e "s/^/$(basename $data .data),/"
	rm $tmpout3
    done
done