diff options
author | Ravineet Singh <ravineet.singh@linaro.org> | 2016-11-29 17:25:35 +0100 |
---|---|---|
committer | Ravineet Singh <ravineet.singh@linaro.org> | 2016-12-21 13:23:12 +0100 |
commit | 9ae8ff88142f5c63ed51457d57c2e466e26fe3f6 (patch) | |
tree | 6970180f518389ea3e8f6e4bc8b41e20fcc6aefd | |
parent | 5232dcb4b025ad394e865b76327aa32d2303427e (diff) |
- verify max nr of tick interrupts and reschd interrupts are lower
than runtime in sec + 1.
- timeout for kernel traces removed, made no sense.
Signed-off-by: Ravineet Singh <ravineet.singh@linaro.org>
-rwxr-xr-x | isolate-cpu.sh | 62 | ||||
-rwxr-xr-x | isolate-task.sh | 179 |
2 files changed, 189 insertions, 52 deletions
diff --git a/isolate-cpu.sh b/isolate-cpu.sh index 1de02ae..55a1467 100755 --- a/isolate-cpu.sh +++ b/isolate-cpu.sh @@ -37,13 +37,11 @@ dlog() { } warn() { - printf "Warning:\n " >&2 - echo "$*" >&2 + printf "Warning: $*\n" >&2 } die() { - printf "Error:\n " >&2 - echo "$*" >&2 + printf "Error: $*\n" >&2 shield_reset exit 1 } @@ -70,7 +68,7 @@ get_cpu_array() { } ## -# Check kernel config and kernel cmfline for rcu callbacs and no_hz +# Check kernel config and kernel cmdline for rcu callbacs and no_hz # *Note* isolcpu= kernel cmdline option isolates CPUs from SMP balancing # If needed, this can be done via cpusets/user/cpuset.sched_load_balance ## @@ -90,38 +88,40 @@ check_kernel_config() { local all_except_0="1-$(($(getconf _NPROCESSORS_ONLN) - 1))" if [ $config ]; then nohz_full=$(zgrep "CONFIG_NO_HZ_FULL_ALL=y" $config 2>/dev/null) && nohz_full=$all_except_0 - rcu_nocbs=$(zgrep "CONFIG_RCU_NOCB_CPU_ALL=y" $config 2>/dev/null) && rcu_nocbs=$all_except_0 - + #rcu_nocbs=$(zgrep "CONFIG_RCU_NOCB_CPU_ALL=y" $config 2>/dev/null) && rcu_nocbs=$all_except_0 else warn "Kernel config not found, only checking /proc/cmdline for isolation features." fi #rcu_nocbs and nohz_full kernel config superseeds cmdline option - if ! [ "$rcu_nocbs" ]; then - eval $(cat /proc/cmdline | grep -o 'rcu_nocbs=[^ ]*') - fi + #if ! [ "$rcu_nocbs" ]; then + # eval $(cat /proc/cmdline | grep -o 'rcu_nocbs=[^ ]*') + #fi if ! [ "$nohz_full" ]; then eval $(cat /proc/cmdline | grep -o 'nohz_full=[^ ]*') fi - if [ -z "$nohz_full" ]; then - warn "No CPU is isolated from kernel ticks, CONFIG_NO_HZ_FULL_ALL=y not set in kernel, nor nohz_full= set in kernel cmdline." + eval $(cat /proc/cmdline | grep -o 'isolcpus=[^ ]*') + if [ -z "$isolcpus" ]; then + warn "No CPU is isolated from kernel/user threads, isolcpus= is not set in kernel cmdline." else - dlog "nohz_full:$nohz_full" - gbl_isolated_cpus=$nohz_full + gbl_isolated_cpus=$isolcpus export gbl_isolated_cpus fi - [ -z "$rcu_nocbs" ] && - warn "No CPU is set to block RCU threads, CONFIG_RCU_NOCB_CPU_ALL=y not set in kernel, nor rcu_nocbs= set in kernel cmdline." + if [ -z "$nohz_full" ]; then + warn "No CPU is isolated from kernel ticks, CONFIG_NO_HZ_FULL_ALL=y not set in kernel, nor nohz_full= set in kernel cmdline." + fi - [ "$nohz_full" = "$rcu_nocbs" ] || warn "Configuration mismatch: nohz_full=$nohz_full," \ - "rcu_nocbs=$rcu_nocbs. CPU isolation will not be efficient" + #[ -z "$rcu_nocbs" ] && warn "No CPU is set to block RCU threads, CONFIG_RCU_NOCB_CPU_ALL=y not set in kernel, nor rcu_nocbs= set in kernel cmdline." - if [ $rcu_nocbs ]; then - dlog "rcu_nocbs=$rcu_nocbs" - fi + #Move rcu threads to core 0 + for i in `pgrep rcu` ; do taskset -pc 0 $i >/dev/null; done + + dlog "isolcpus:$isolcpus" + dlog "nohz_full:$nohz_full" + #dlog "rcu_nocbs:$rcu_nocbs" return 0 } @@ -143,6 +143,7 @@ cpus_valid() { return 0 } + check_prequesties() { dlog "Checking prequesties; user is root, kernel has cpuset support, and commads; cset, zgrep, getconf are available" [ $UID -eq 0 ] || die "You need to be root!" @@ -154,6 +155,7 @@ check_prequesties() { shield_reset() { cset shield -r >/dev/null 2>&1 + sleep 0.1 } shield_list() { @@ -170,9 +172,10 @@ shield_cpus() { local cpus="$1" dlog "shielding CPU:s $cpus" + #Reset and create new shield - cset shield -r >/dev/null 2>&1 - out=$(cset shield -c $cpus kthread=on 2>&1) || die "cset failed; $out" + shield_reset + out=$(cset shield -c $cpus -k on 2>&1) || die "cset failed; $out" # Delay the annoying vmstat timer far away sysctl vm.stat_interval=120 >/dev/null @@ -184,8 +187,8 @@ shield_cpus() { #Fixme, check that /sys/bus is mounted? echo 1 > /sys/bus/workqueue/devices/writeback/cpumask - #Fixme disable load balanser? How do we schedule our threads? - #echo 0 > /cpusets/user/cpuset.sched_load_balance + # Disable load balanser. + echo 0 > /cpusets/user/cpuset.sched_load_balance #Fixme, for now just send all irqs to core 0 for affinity in /proc/irq/*/smp_affinity; do @@ -218,6 +221,15 @@ isolate_cpus() { dlog "Isolating CPUs $cpus" shield_cpus $cpus + + # Verfiy cores empty + for c in $(get_cpu_array $cpus); do + running=$(ps ax -o pid,psr,comm | awk -v cpu="$c" '{if($2==cpu){print $3}}') + if [ "$running" != "" ]; then + warn "Core $c not empty; running tasks:\n$running\n" + fi + done + return 0 } diff --git a/isolate-task.sh b/isolate-task.sh index 6aff0c5..5673ab8 100755 --- a/isolate-task.sh +++ b/isolate-task.sh @@ -1,18 +1,25 @@ #!/bin/bash # # This script isolates a task on desired CPUs. -# If desired, kernel calls are traced to see kernel interference on isolated CPUs +# Optionally; +# - kernel calls can be traced (-t) +# - application can be verified to not be interrupted by kernel ticks +# as long as the application itself does not use Linux timer... of course +# +# ToDo +# - Remove kill output print_usage() { - echo "$0 [-h] [-t <time>] [-c <cpu list>] <application arg1, arg2, ...>" + echo "$0 [-c <cpu list>] [-d] [-h] [-t] [-v] <application arg1, arg2, ...>" echo echo " Isolate CPU(s) from other tasks, kernel threads and IRQs" echo " and run an application on isolated CPUs" echo " Args:" - echo " -h Print this message" - echo " -t Trace kernel calls for 'time' seconds on chosen CPUs" echo " -c List of CPUs to be isolated" echo " -d Show debug printouts" + echo " -h Print this message" + echo " -t Trace kernel calls on chosen CPUs" + echo " -v Verify task isolation" echo "" echo "All CPU's, except CPU 0, are isolated unless '-c' specified" echo " Examples:" @@ -31,16 +38,23 @@ dlog() { } warn() { - printf "Warning:\n " >&2 - echo "$*" >&2 + printf "Warning: $*\n" >&2 } die() { - printf "Error:\n " >&2 - echo "$*" >&2 + printf "Error: $*\n" >&2 exit 1 } +trap cleanup INT EXIT + +cleanup(){ + pids=$(pgrep $MY_STRESS 2>/dev/null) + [ "$pids" != "" ] && kill -9 $pids + rm -f $MY_STRESS_PATH + rm -f $tmp_file +} + get_cpu_array() { [ $1 ] || die "$FUNCNAME internal error!" @@ -58,9 +72,64 @@ get_cpu_array() { fi done + IFS=" " + for c in $cpus; do + local re='^[0-9]+$' + if ! [[ $c =~ $re ]] ; then + die "$c is not a valid core" + fi + done + echo $cpus } +read_interrups(){ + local verify=$1 + local tmpfile=$2 + + [ $verify -eq 0 ] && return + cat /proc/interrupts > $tmpfile +} + +verify_interrups(){ + + local verify=$1 + local tmo_start=$2 + local cpu=$3 + local tmpfile=$4 + + [ $verify -eq 0 ] && return 0 + + local tmo=$(date +%s) + tmo=$(expr 1 + $tmo - $tmo_start) + + local loc_timer_int_o=$(grep "Local timer interrupts" $tmpfile | awk -v cpu="$cpu" '{print $(cpu + 2)}') + local loc_timer_int=$(grep "Local timer interrupts" /proc/interrupts | awk -v cpu="$cpu" '{print $(cpu + 2)}') + local resched_int_o=$(grep "Rescheduling interrupts" $tmpfile | awk -v cpu="$cpu" '{print $(cpu + 2)}') + local resched_int=$(grep "Rescheduling interrupts" /proc/interrupts | awk -v cpu="$cpu" '{print $(cpu + 2)}') + + local diff_local=$(expr $loc_timer_int - $loc_timer_int_o) + local diff_reshed=$(expr $resched_int - $resched_int_o) + + if [ $diff_local -gt $tmo ]; then + echo "TEST_ISOLATION_CORE_${cpu}_1: FAIL">&2 + echo " Local timer interrupts on core $cpu greater than expected." >&2 + echo " expected max $tmo, got $diff_local ($loc_timer_int - $loc_timer_int_o)" + return 1 + else + if [ $diff_reshed -gt $tmo ]; then + echo "TEST_ISOLATION_CORE_${cpu}_2: FAIL">&2 + echo " Rescheduling interrupts on core $cpu greater than expected." >&2 + echo " expected max $tmo, got $diff_reshed ($resched_int - $resched_int_o)" + return 2 + fi + fi + + echo "TEST_ISOLATION_CORE_${cpu}_1: PASS" + echo "TEST_ISOLATION_CORE_${cpu}_2: PASS" + return 0 +} + enable_tracing() { [ $1 -eq 0 ] && return @@ -74,7 +143,7 @@ enable_tracing() { echo 1 > $DIR/events/timer/hrtimer_expire_entry/enable echo 1 > $DIR/events/timer/tick_stop/enable echo nop > $DIR/current_tracer - #To see the complete call grapf, enable the line below + # To see the complete call grapf, enable the line below #echo function_graph > $DIR/current_tracer echo 1 > $DIR/tracing_on @@ -98,13 +167,45 @@ dump_trace_data(){ dlog "Trace data dumped for CPUs: $ISOL_CPUS_ARRAY" } -isolate_and_run() { - local tracing=$1 - local cpus=$2 - local app="$3" - local base=$(dirname $0) +wait_app_started () { + local child=$1 + local ltasks=0 + while true; do + sleep 0.01 + kill -0 $child 2>/dev/null || break + tasks=$(ls /proc/$child/task | wc -l) + [ $tasks -eq $ltasks ] && break + ltasks=$tasks + done + dlog "app started, # threads:$ltasks" +} + +start_stress_procs() { + [ $1 -eq 0 ] && return + + local nr=$(cat /proc/cpuinfo | grep processor| wc -l) + $MY_STRESS_PATH -c $nr 2>&1 >/dev/null & + STRESS_PID=$! +} + +stop_stress_procs() { + [ $1 -eq 0 ] && return + + disown $STRESS_PID + kill -9 $STRESS_PID +} + +isolate_cpu(){ + local cpus=$1 + local base=$(dirname $0) $base/isolate-cpu.sh -c $cpus || die "$0 failed" +} + +run_application() { + local tracing=$1 + local verify=$2 + local app="$3" dlog "Starting application: $app" $app& @@ -116,17 +217,13 @@ isolate_and_run() { die "Failed to isolate task..." fi + start_stress_procs $verify + wait_app_started $child enable_tracing $tracing - local counter=$tracing - while [ "$counter" -gt "0" ]; do - kill -0 $child 2>/dev/null || break - sleep 1 - counter=$(expr $counter - 1) - done - + wait $child disable_tracing $tracing + stop_stress_procs $verify dump_trace_data $tracing - return 0 } check_prequesties() { @@ -137,6 +234,7 @@ check_prequesties() { if [ $tracing -ne 0 ]; then [ $UID -eq 0 ] || die "You need to be root!" [ -e /sys/kernel/debug/tracing ] || die "Kernel lacks tracing funtionality (ftrace)" + which stress > /dev/null 2>&1 || die "stress command not found, please install stress" fi } @@ -144,8 +242,11 @@ check_prequesties() { # Script entry point ## TRACING=0 +VERIFY=0 + ISOL_CPUS="1-$(($(getconf _NPROCESSORS_ONLN) - 1))" -while getopts hdc:t: arguments + +while getopts hdtvc: arguments do case $arguments in h) @@ -159,15 +260,18 @@ do ISOL_CPUS=$OPTARG ;; t) - TRACING=$OPTARG + TRACING=1 ;; + v) + VERIFY=1 + ;; *) print_usage exit 1 ;; esac done -#Remove all flags +# Remove all flags shift $((OPTIND-1)) if ! [ "$1" ]; then @@ -175,8 +279,29 @@ if ! [ "$1" ]; then exit 1 fi +# Isolate and optionally verify ISOL_CPUS_ARRAY=$(get_cpu_array $ISOL_CPUS) -app=$* +app="$*" check_prequesties $TRACING -isolate_and_run $TRACING $ISOL_CPUS "$app" + +MY_STRESS=stress-by-$$ +MY_STRESS_PATH=/tmp/$MY_STRESS +ln -sf $(which stress) $MY_STRESS_PATH + +tmp_file=$(mktemp /tmp/isolate_XXXXX) + +isolate_cpu $ISOL_CPUS + +read_interrups $VERIFY $tmp_file +TMO=$(date +%s) + +run_application $TRACING $VERIFY "$app" +ret=0 +for c in $ISOL_CPUS_ARRAY; do + verify_interrups $VERIFY $TMO $c $tmp_file + ret=$? + [ $ret -eq 0 ] || break +done + +exit $ret |