aboutsummaryrefslogtreecommitdiff
path: root/prepare-board.sh
blob: 9274514715330c164778103dc134a99afbb58adb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
#!/bin/bash
#
# This program will configure a board for benchmarking, with
# all steps reversible.
#
# Design ideas:
#   All configure_* functions have <start|stop> as their
#   first argument. If "start_*" is used, they should append
#   sufficient information to a file in ${RESTORE_LOC} so
#   that the original state can be restored later. If "stop_*"
#   is used, AND the corresponding file in ${RESTORE_LOC} is
#   present, that file should be used to restore state, and
#   then deleted.
#
#   Where it is possible to automagically detect the need for
#   an configuration step, configuration should be added to
#   configure_common (eg do_tegra_start)
#
#   To add support for a new board. Create:
#   . prepare_board/freq-<board_type>
#   . [optional] prepare_board/sysctl-<board_type>.conf
#   . [optional] configure_<board_type> function in this file
#
#   This file is expected to have no errors detected by
#   the shellcheck program.
#

############################################################
# initialise script environment
set -eu -o pipefail
TOP=$(dirname "$(readlink -f "$0")")

############################################################
# configuration
CONFS=$TOP/prepare-board
RESTORE_LOC=/run/prepare-board
# FIXME: put this in a board-specific conf file
SERVICE_LIST=("ntp")

############################################################
# usage/help
help()
{
    cat <<EOF
Syntax: $0 <--action ACTION> [--hw_tag <tx1_64|tk1_32|...>]

    --action start_board/stop_board:
                    prepare/finalize bare board for benchmarking
    --action start_docker/stop_docker:
                    prepare/finalize docker container for benchmarking
    --hw_tag:       board hardware type (tk1_32, tx1_64, ...)
    --verbose:      output progress as settings are applied
EOF
}


############################################################
# helper functions

# report error and exit
error()
{
    echo "ERROR: $*" >&2
    exit 1
}

# report warning and continue
warning()
{
    echo "WARNING: $*" >&2
}

# display notice in verbose mode
verbose()
{
    if [ "${VERBOSE:+set}" = "set" ]; then
        echo "NOTE: $*" >&2
    fi
}

############################################################
# board type ID helper functions
hw_tag2board_type()
{
    local hw_tag
    hw_tag="$1"
    case "$hw_tag" in
	tk1*) echo "tk1" ;;
	tx1*) echo "tx1" ;;
	*) error "Unknown hw_tag $hw_tag" ;;
    esac
}

############################################################
# board configuration helper functions

# configure access to linux tools: cpupower and perf
configure_linux_tools()
{
  local op=$1

  case "$op" in
      start_board)
	  LINUX_TOOLS="/usr/lib/linux-tools/$(uname -r)"
	  # Both TK1s and TX1 are configured to use
	  # linux-tools-4.4.0-59-generic.  Cpupower appears to work fine with
	  # their kernels (3.10 for TK1s and 4.4 for TX1s).
	  if ! [ -d "$LINUX_TOOLS" ]; then
	      error "Bare board does not have linux-tools configured"
	  fi
	  ;;
      start_docker)
	  LINUX_TOOLS="/usr/lib/linux-tools/$HW_TAG"
	  # Our main hw_tags tk1_32, tx1_64 and tx1_32 are all aliased
	  # to linux-tools-4.18.0-13-generic from ubuntu bionic.
	  # Perf from 4.18 is recent enough to support symbol_size sorting
	  # field, which we use to track code-size, and appears to work
	  # just fine with TK1s 3.10 and TX1s 4.4 kernels.
	  if ! [ -d "$LINUX_TOOLS" ]; then
	      error "Docker image does not support hw_tag $HW_TAG"
	  fi
	  ;;
  esac
}

# set_var writes a value to a file (eg in /proc), and records
# the original setting for later restoration by restore_vars.
set_var()
{
    local restore_file=$1
    local file=$2
    local value=$3

    local orig_value
    orig_value=$(cat "$file")

    if [ "$value" != "$orig_value" ]; then
        verbose "Setting $file to $value (was $orig_value)"
        echo "$file=$orig_value" >> "$restore_file"
        echo "$value" > "$file"
        if [ x"$(cat "$file")" != x"$value" ]; then
            error "cannot set $file to $value"
        fi
    else
        verbose "Not setting $file, already set to $value."
    fi
}

# restore_vars restores the original values for configuration
# which was set by set_var.
restore_vars()
{
    local restore_file=$1

    if [ ! -f "$restore_file" ]; then
        verbose "Restore file $restore_file not present"
        return 0;
    fi
    tac "$restore_file" | while read entry; do
        file=$(echo "$entry" | cut -d= -f 1 )
        value=$(echo "$entry" | cut -d= -f 2)
        verbose "Setting $file to $value"
        (echo "$value" > "$file") || true
        if [ x"$(cat "$file")" != x"$value" ]; then
            warning "cannot reset $file to $value"
        fi
    done
    rm -f "$restore_file"
}

############################################################
# board configuration functions

########################################
# configure_sysctls and helpers
#   sets sysctls from ${CONFS}/sysctl-<board_type>.conf or ${CONFS}/sysctl.conf

# do_sysctl parses a sysctl.conf file, and sets the sysctls
# in /proc accordingly
do_sysctl()
{
    local sysctl_file="$1"
    local sysctl_revert="$2"
    local entry
    local file
    local value
    local orig_value
    while read entry; do
        file=$(echo "$entry" | cut -d= -f 1 | sed -e "s/ //g" -e "s#\.#/#g")
        value=$(echo "$entry" | cut -d= -f 2 | sed -e "s/ //g")
        set_var "$sysctl_revert" "/proc/sys/$file" "$value"
    done < "$sysctl_file"
}

configure_sysctls()
{
    local op=$1
    if [ "$op" = "start_board" ]; then
        local sysctl_file=$CONFS/sysctl-${BOARD_TYPE}.conf
        if [ ! -f "$sysctl_file" ]; then
            sysctl_file=$CONFS/sysctl.conf
        fi
        do_sysctl "$sysctl_file" "${RESTORE_LOC}/sysctls.conf"
    elif [ "$op" = "stop_board" ]; then
        restore_vars "${RESTORE_LOC}/sysctls.conf"
    fi
}


########################################
# configure_services and helpers
#   stops all services in ${SERVICE_LIST[*]} for benchmarking

# do_service starts or stops a named service
# assumes that the system is using systemd
do_service()
{
    local op=$1
    local restart_file=$2
    local service=$3

    if [ "$op" = "start_board" ]; then
        if [ "$(systemctl is-active "$service" || true)" = "active" ]; then
            verbose "Stopping system service $service"
            systemctl stop "$service"
            echo "$service" >> "$restart_file"
        else
            verbose "Skipping system service $service, which is already stopped"
        fi
    elif [ "$op" = "stop_board" ]; then
        if [ -f "$restart_file" ] &&
           grep -q --line-regexp -e "$service" "$restart_file"; then
            verbose "Restarting system service $service"
            systemctl start "$service"
        else
            verbose "Not restarting service $service, because --start didn't stop it"
        fi
    fi
}

configure_services()
{
    local op=$1
    local i
    local restart_file=${RESTORE_LOC}/services
    for i in "${SERVICE_LIST[@]}"; do
        do_service "$op" "$restart_file" "$i"
    done
    if [ "$op" = "stop_board" ]; then
        rm -f "$restart_file"
    fi
}

########################################
# configure_perf_hack and helpers

configure_perf_hack()
{
    local op=$1
    local conf_file="$CONFS/perf_hack-$BOARD_TYPE"
    local pidfile="${RESTORE_LOC}/perf_hack.pid"
    local pid

    if [ ! -f "$conf_file" ]; then
	return
    fi

    if [ -f "$pidfile" ]; then
        pid=$(cat "$pidfile")
        kill "$pid" || true
        verbose "Stopped perf workaround on PID $pid"
        rm -f "$pidfile"
    fi

    if [ "$op" = "start_docker" ]; then
	# We do want word splitting here as the conf contains cmd line options for perf
	# shellcheck disable=SC2046
	taskset -c 0 $LINUX_TOOLS/perf record -q -N $(cat $conf_file) -o /dev/null -- sleep 2000d 1>&- 2>&- &
	pid=$!
	disown $pid
	echo "$pid" >> "$pidfile"
	verbose "Starting perf workaround as PID $pid"
    fi
}

########################################
# configure_cpu_freq and helpers
#   sets CPU frequency scaling range and governor
calc_freq()
{
    local cpufreq_path=$1
    local req_freq=$2
    case "$req_freq" in
      min) cat "$cpufreq_path/cpuinfo_min_freq" ;;
      max) cat "$cpufreq_path/cpuinfo_max_freq" ;;
      *) echo "$req_freq" ;;
    esac
}

# do_cpufreq_start parses the ${CONFS}/freq-* files
# format of the frequency configuration is one line:
# <cpulist> <governor> <minfreq> <maxfreq>
# frequencies are in kHz or "min" or "max"
do_cpufreq_start()
{
    local restore_file="$1"
    local file="$CONFS/freq-$BOARD_TYPE"

    if [ ! -f "$file" ]; then
        warning "Not configuring cpufreq because $file does not exist"
        return 0
    fi

    verbose "Selected cpufreq file $file."

    local freqconf
    freqconf=$(grep -v '^ *#' "$file" | head -n 1)
    local cpus
    cpus=$(echo "$freqconf" | awk '{print $1;}')
    local gov
    gov=$(echo "$freqconf" | awk '{print $2;}')
    local min
    min=$(echo "$freqconf" | awk '{print $3;}')
    local max
    max=$(echo "$freqconf" | awk '{print $4;}')

    while [ ! -z "$cpus" ]; do
        local cpu
        local maxfreq
        local minfreq
        cpu=$(echo "$cpus" | cut -f 1 -d ,)
        cpus=$(echo "$cpus" | cut -f 2- -d , -s)

        local cpufreq_path=/sys/devices/system/cpu/cpu${cpu}/cpufreq

        maxfreq=$(calc_freq "$cpufreq_path" "$max")
        minfreq=$(calc_freq "$cpufreq_path" "$min")

	# By default use ondemand governor with HW min and max frequencies.
	# This avoid inconsistent state when we try to set $minfreq below
	# current maximum frequency.
	$LINUX_TOOLS/cpupower -c $cpu frequency-set --governor ondemand --min "$(calc_freq "$cpufreq_path" min)" --max "$(calc_freq "$cpufreq_path" max)"

        set_var "$restore_file" "$cpufreq_path/scaling_governor" "$gov"
        set_var "$restore_file" "$cpufreq_path/scaling_min_freq" "$minfreq"
        set_var "$restore_file" "$cpufreq_path/scaling_max_freq" "$maxfreq"
    done
}

# if we have the tegra_cpuquiet feature, disable it
do_tegra_start()
{
    local file=$1
    local i
    if [ -f "/sys/devices/system/cpu/cpuquiet/tegra_cpuquiet/enable" ]; then
        set_var "$file" /sys/devices/system/cpu/cpuquiet/tegra_cpuquiet/enable 0
        for i in /sys/devices/system/cpu/cpu*/online; do
            set_var "$file" "$i" 1
        done
    fi
}

configure_cpufreq()
{
    local file=${RESTORE_LOC}/cpufreq
    if [ "$op" = "start_board" ]; then
        # we have to disable cpuquiet before setting frequencies, but
        # do the restoration in the opposite order. So it's easiest
        # to share the same restore file.
        do_tegra_start "$file"
        do_cpufreq_start "$file"
    elif [ "$op" = "stop_board" ]; then
        restore_vars "$file"
    fi
}


############################################################
# board-specific configuration functions
#

########################################
# configure_board_type_file()
#   saves/restores board type
configure_board_type_file()
{
    local op=$1
    local file="$RESTORE_LOC/board_type"

    case "$op" in
	start_*)
            echo "$BOARD_TYPE" > "$file"
	    ;;
	stop_*)
            if [ ! -f "$file" ]; then
		warning "Board not previously configured with $0"
		exit 0
            fi
            BOARD_TYPE=$(cat "$file")
	    ;;
    esac
}

########################################
# configure_ntp()
#   update time on the board
configure_ntp()
{
    local op=$1

    if [ "$op" = "start_board" ]; then
	# Retry on occasional failures from ntpd.
	if ! ntpd -gq; then
	    # Occasionally systemctl fails to properly kill ntp and
	    # we get stuck ntpd process that doesn't respond to SIGTERM.
	    # Kill it with SIGKILL.
	    pkill -9 ntpd || true
	    sleep 5
	    ntpd -gq
	fi
    fi
}

# Check that docker can start a container.
test_docker()
{
    # We run this script under as root, so don't have tcwg-benchmark's $PATH
    # in our environment.  Use docker-wrapper via absolute path.
    local docker=/home/tcwg-benchmark/bin/docker-wrapper
    $docker ps
    if [ x"$DOCKER_IMAGE" != x"" ]; then
	$docker maybepull "$DOCKER_IMAGE"
	$docker run --rm --entrypoint=/usr/bin/true "$DOCKER_IMAGE"
    fi
    verbose "Docker seems to be OK"
}

########################################
# configure_docker()
#   Make sure docker is functional on the board
configure_docker()
{
    local op=$1
    local restore_file=${RESTORE_LOC}/docker
    local cnt

    if [ "$op" = "start_board" ]; then
	# Retry on occasional failures from docker.
	test_docker &
	if ! wait $!; then
	    case "$BOARD_TYPE" in
		tk1)
		    # With the TK1's old kernel the only way to run docker
		    # is to use devicemapper storage driver with loopback
		    # backend, which is unfit for production usage.
		    # Every few months the loopback file gets corrupted and
		    # docker can't start.
		    # To solve this we go nuclear on docker.
		    /usr/sbin/service docker stop || true
		    rm -rf /var/lib/docker/
		    ;;
	    esac
	    /usr/sbin/service docker restart
	    test_docker &
	    if ! wait $!; then
		error "Cannot make docker work on the system"
	    fi
	fi

	for cnt in $(docker ps -q); do
	    docker stop $cnt
	    echo $cnt >> "$restore_file"
	done
    elif [ "$op" = "stop_board" ]; then
	for cnt in $(docker ps -q -f status=exited); do
	    if grep -q "^$cnt\$" "$restore_file"; then
	       docker start $cnt
	    fi
	done
	rm -f "$restore_file"
    fi
}


############################################################
# perform the configuration
configure_common()
{
    local op=$1
    case "$op" in
	start_*) mkdir -p "${RESTORE_LOC}" ;;
    esac

    configure_board_type_file "$op"
    configure_linux_tools "$op"
    configure_sysctls "$op"
    configure_services "$op"
    configure_ntp "$op"
    configure_cpufreq "$op"
    configure_perf_hack "$op"
    configure_docker "$op"

    case "$op" in
	stop_*)
            rm -f "$RESTORE_LOC/board_type"
            rmdir "${RESTORE_LOC}"
	    ;;
    esac
}

DOCKER_IMAGE=""

############################################################
# parse command line options
while [[ $# -gt 0 ]]; do
   OPT=$1
   shift
   case "$OPT" in
     --action) ACTION=$1; shift ;;
     --hw_tag) HW_TAG=$1; shift ;;
     --image) DOCKER_IMAGE="$1"; shift ;;
     --verbose) VERBOSE=1; ;;
     *) echo "Unrecognised option: $OPT" >&2; exit 1;;
   esac
done

############################################################
# validate command line options


if [ "${ACTION:+set}" != "set" ]; then
    error "Must use one of --start or --stop"
fi

case "${ACTION}:${HW_TAG:+x}" in
    start_*:"x") ;;
    start_*:*)
	error "Must specify --hw_tag"
	;;
    stop_*:"") ;;
    stop_*:*)
	error "Must not specify --hw_tag when stopping."
	;;
esac

############################################################
# process command line options to get config
if [ "${HW_TAG:+set}" = "set" ]; then
    BOARD_TYPE="$(hw_tag2board_type "$HW_TAG")"
fi

############################################################
# do the work!
configure_common "$ACTION"