aboutsummaryrefslogtreecommitdiff
path: root/automated/android
diff options
context:
space:
mode:
authorKarsten Tausche <karsten@fairphone.com>2019-01-14 16:32:32 +0100
committerChase Qi <chase.qi@linaro.org>2019-03-14 10:38:11 +0800
commitdce10089766a0bf0a0224578013058cb76a3ea4e (patch)
tree3d07f1aa08b3b21d482b2f19f5f2034838f956b0 /automated/android
parent91c916c00747aa1fa52dbb5707cb5a5b24c0243e (diff)
MultiNode Tradefed: Reset userdata before retry
If supplied, flash a userdata image to the devices before triggering Tradefed retry. This helps reducing invalid failures that are caused by tests bringing devices into a state where other tests cannot pass. Issue: INFRA-137 Change-Id: I4a6510ceab6aec7ce530b9f8e244e9655dc22b81 Depends-On: Ie21cc417c78ec88da523f0d14ea85c1e9bcb912c Signed-off-by: Karsten Tausche <karsten@fairphone.com>
Diffstat (limited to 'automated/android')
-rwxr-xr-xautomated/android/multinode/tradefed/tradefed-multinode.sh5
-rw-r--r--automated/android/multinode/tradefed/tradefed-multinode.yaml5
-rwxr-xr-xautomated/android/multinode/tradefed/tradefed-runner-multinode.py26
-rw-r--r--automated/android/multinode/tradefed/utils.py72
-rwxr-xr-xautomated/android/multinode/wait-and-keep-local-device-accessible.sh34
-rw-r--r--automated/android/multinode/wait-and-keep-local-device-accessible.yaml7
6 files changed, 144 insertions, 5 deletions
diff --git a/automated/android/multinode/tradefed/tradefed-multinode.sh b/automated/android/multinode/tradefed/tradefed-multinode.sh
index 567b09a..39c24d6 100755
--- a/automated/android/multinode/tradefed/tradefed-multinode.sh
+++ b/automated/android/multinode/tradefed/tradefed-multinode.sh
@@ -32,11 +32,12 @@ $0 [-o timeout_secs] [ -m device_worker_mapping_file] [-c cts_url]
[-t test_params] [-u test_retry_params] [-i max_num_runs] [-n runs_if_unchanged]
[-p test_path] [-s state_check_frequency_secs] [-r <aggregated|atomic>]
[-f failures_printed] [-a <ap_ssid>] [-k <ap_key>] [-j <java_options>]
+[-b <userdata_image_file>]
heredoc
exit 1
}
-while getopts ':o:m:c:t:u:i:n:p:s:r:f:a:k:j:' opt; do
+while getopts ':o:m:c:t:u:i:n:p:s:r:f:a:k:j:b:' opt; do
case "${opt}" in
o) TIMEOUT_SECS="${OPTARG}" ;;
m) DEVICE_WORKER_MAPPING_FILE="${OPTARG}" ;;
@@ -52,6 +53,7 @@ while getopts ':o:m:c:t:u:i:n:p:s:r:f:a:k:j:' opt; do
a) AP_SSID="${OPTARG}" ;;
k) AP_KEY="${OPTARG}" ;;
j) JAVA_OPTIONS="${OPTARG}" ;;
+ b) USERDATA_IMAGE_FILE="${OPTARG}" ;;
*) usage ;;
esac
done
@@ -135,6 +137,7 @@ runner_exited_cleanly="pass"
./tradefed-runner-multinode.py -t "${TEST_PARAMS}" -u "${TEST_RETRY_PARAMS}" -i "${MAX_NUM_RUNS}" \
-n "${RUNS_IF_UNCHANGED}" -p "${TEST_PATH}" -s "${STATE_CHECK_FREQUENCY_SECS}" \
-r "${RESULT_FORMAT}" -f "${FAILURES_PRINTED}" -m "${DEVICE_WORKER_MAPPING_FILE}" \
+ --userdata_image_file "${USERDATA_IMAGE_FILE}" \
|| runner_exited_cleanly="fail"
# "fail" here means that an unexpected error/exception occurred in the runner.
diff --git a/automated/android/multinode/tradefed/tradefed-multinode.yaml b/automated/android/multinode/tradefed/tradefed-multinode.yaml
index 3e30a21..d6728df 100644
--- a/automated/android/multinode/tradefed/tradefed-multinode.yaml
+++ b/automated/android/multinode/tradefed/tradefed-multinode.yaml
@@ -51,6 +51,9 @@ params:
# For devices locally connected via USB, <device> the serial number of the
# device and <workerId> must be empty.
DEVICE_WORKER_MAPPING_FILE: "/tmp/deviceWorkerMapping"
+ # Userdata image file that will be used to reset devices to a clean state
+ # before starting TradeFed reruns.
+ USERDATA_IMAGE_FILE: ""
# Let the whole test run fail if the test runner failed to exit cleanly.
RAISE_ON_FAILURE: "true"
@@ -85,7 +88,7 @@ run:
-s "${STATE_CHECK_FREQUENCY_SECS}" -r "${RESULTS_FORMAT}" \
-m "${DEVICE_WORKER_MAPPING_FILE}" -f "${FAILURES_PRINTED}" \
-a "${AP_SSID}" -k "${AP_KEY}" -j "${JAVA_OPTIONS}" \
- || exec_result=$?
+ -b "${USERDATA_IMAGE_FILE}" || exec_result=$?
# Upload test log and result files to artifactorial.
- cp -r ./${TEST_PATH}/results ./output/ || true
- cp -r ./${TEST_PATH}/logs ./output/ || true
diff --git a/automated/android/multinode/tradefed/tradefed-runner-multinode.py b/automated/android/multinode/tradefed/tradefed-runner-multinode.py
index f862057..a28f5af 100755
--- a/automated/android/multinode/tradefed/tradefed-runner-multinode.py
+++ b/automated/android/multinode/tradefed/tradefed-runner-multinode.py
@@ -63,6 +63,10 @@ parser.add_argument('-f', dest='FAILURES_PRINTED', type=int,
required=False, default=0,
help="Specify the number of failed test cases to be\
printed, 0 means not print any failures.")
+parser.add_argument('--userdata_image_file', dest='USERDATA_IMAGE_FILE',
+ required=False, help="Userdata image file that will be \
+ used to reset devices to a clean state before starting \
+ TradeFed reruns.")
args = parser.parse_args()
@@ -91,7 +95,14 @@ try:
device_address = deviceToWorker[0]
worker_job_id = (None if (len(deviceToWorker) == 1 or not deviceToWorker[1])
else deviceToWorker[1])
- devices.append(Device(device_address, TRADEFED_LOGCAT % device_address, worker_job_id))
+ devices.append(
+ Device(
+ serial_or_address=device_address,
+ logcat_output_filename=TRADEFED_LOGCAT % device_address,
+ worker_job_id=worker_job_id,
+ userdata_image_file=args.USERDATA_IMAGE_FILE,
+ )
+ )
except OSError as e:
logger.error("Mapping file cannot be opened: %s" % args.DEVICE_WORKER_MAPPING_FILE)
sys.exit(1)
@@ -337,6 +348,19 @@ while child.isalive():
logger.info('NOT retrying TradeFed session as maximum number of retries is reached.')
else:
logger.info('Retrying with results of session %s' % tradefed_session_id)
+ logger.info('First resetting the devices to a clean state...')
+
+ unavailable_devices = []
+ for device in devices:
+ if not device.userdata_reset():
+ unavailable_devices += [device.serial_or_address]
+ if unavailable_devices:
+ logger.warning(
+ 'Following devices were not reset successfully '
+ 'or are not yet available again: %s'
+ % ', '.join(unavailable_devices)
+ )
+
try:
child.expect(prompt, timeout=60)
child.sendline('%s --retry %s' % (args.TEST_RETRY_PARAMS, str(tradefed_session_id)))
diff --git a/automated/android/multinode/tradefed/utils.py b/automated/android/multinode/tradefed/utils.py
index d470885..6fab4d3 100644
--- a/automated/android/multinode/tradefed/utils.py
+++ b/automated/android/multinode/tradefed/utils.py
@@ -1,9 +1,11 @@
import logging
+import os.path
import re
import shutil
import subprocess
import sys
import time
+from typing import Dict
sys.path.insert(0, "../../../lib/")
from py_util_lib import call_shell_lib # nopep8
@@ -16,7 +18,11 @@ class Device:
EXEC_IN_LAVA = shutil.which("lava-send") is not None
def __init__(
- self, serial_or_address, logcat_output_filename, worker_job_id=None
+ self,
+ serial_or_address,
+ logcat_output_filename,
+ worker_job_id=None,
+ userdata_image_file=None,
):
self.serial_or_address = serial_or_address
self.is_tcpip_device = bool(
@@ -29,6 +35,7 @@ class Device:
)
self.worker_job_id = worker_job_id
self.worker_handshake_iteration = 1
+ self.userdata_image_file = userdata_image_file
self._is_available = True
def ensure_available(self, logger, timeout_secs=30):
@@ -111,6 +118,10 @@ class Device:
# function will return failure, but the device can still become accessible in the next
# iteration of device availability checks.
+ # `fastboot devices` prints in some versions more debug information
+ # than `fastboot reboot`, e.g., missing udev rules.
+ subprocess.run(["fastboot", "devices"])
+
# There is no point in waiting longer for `fastboot reboot`:
fastbootRebootTimeoutSecs = 10
try:
@@ -123,6 +134,8 @@ class Device:
# failure.
pass
+ subprocess.run(["fastboot", "devices"])
+
bootTimeoutSecs = max(
10, int(reconnectTimeoutSecs) - fastbootRebootTimeoutSecs
)
@@ -149,6 +162,11 @@ class Device:
if not self.check_available():
return False
+
+ # Ensure that the device screen is on during test runs.
+ if not self._call_shell_lib("disable_suspend"):
+ print("WARNING: Disabling device suspend may have failed.")
+
# reestablish logcat connection
self.logcat.kill()
self.logcat = subprocess.Popen(
@@ -157,6 +175,58 @@ class Device:
)
return True
+ def userdata_reset(self, commandTimeoutSecs=60, reconnectTimeoutSecs=900):
+ """Reset the device to a clean state. This is equivalent to resetting to
+ factory settings and applying CTS set-up steps."""
+ if not self.userdata_image_file:
+ print("WARNING: Skipping userdata_reset; no image file provided.")
+ return True
+ if not os.path.isfile(self.userdata_image_file):
+ print(
+ "WARNING: Skipping userdata_reset; image file not found: %s"
+ % self.userdata_image_file
+ )
+
+ print("Resetting userdata partition on %s" % self.serial_or_address)
+
+ # Reflash the userdata partition.
+ if self.is_tcpip_device:
+ self.worker_handshake("userdata_reset")
+ else:
+ try:
+ subprocess.run(
+ [
+ "adb",
+ "-s",
+ self.serial_or_address,
+ "reboot",
+ "bootloader",
+ ],
+ timeout=commandTimeoutSecs,
+ )
+ except subprocess.TimeoutExpired:
+ # Blocking `adb reboot` does not necessarily indicate a failure.
+ pass
+ try:
+ subprocess.run(
+ [
+ "fastboot",
+ "-s",
+ self.serial_or_address,
+ "flash",
+ "userdata",
+ self.userdata_image_file,
+ ],
+ timeout=commandTimeoutSecs,
+ )
+ except subprocess.TimeoutExpired as e:
+ print(e)
+ return False
+
+ # Reconnect as usual.
+ if not self.try_reconnect(reconnectTimeoutSecs=reconnectTimeoutSecs):
+ return False
+
def release(self):
self.logcat.kill()
self.logcat_output_file.close()
diff --git a/automated/android/multinode/wait-and-keep-local-device-accessible.sh b/automated/android/multinode/wait-and-keep-local-device-accessible.sh
index 6776436..aee98f2 100755
--- a/automated/android/multinode/wait-and-keep-local-device-accessible.sh
+++ b/automated/android/multinode/wait-and-keep-local-device-accessible.sh
@@ -14,6 +14,7 @@ NETWORK_TIMEOUT_SECS=${NETWORK_TIMEOUT_SECS:-300}
ADB_TCPIP_ATTEMPTS=${ADB_TCPIP_ATTEMPTS:-5}
ADB_CONNECT_TEST_TIMEOUT_SECS=${ADB_CONNECT_TEST_TIMEOUT_SECS:-60}
ANDROID_ENABLE_WIFI=${ANDROID_ENABLE_WIFI:-true}
+USERDATA_IMAGE_FILE=${USERDATA_IMAGE_FILE:-""}
# shellcheck source=automated/lib/sh-test-lib
. "${MY_AUTOMATED_DIR}/lib/sh-test-lib"
@@ -64,6 +65,34 @@ settings; UI automation failed."
fi
}
+userdata_reset() {
+ if [ -z "${USERDATA_IMAGE_FILE}" ]; then
+ warn_msg "Skipping userdata_reset; no image file provided."
+ return
+ fi
+ if [ ! -f "${USERDATA_IMAGE_FILE}" ]; then
+ warn_msg "Skipping userdata_reset; image file not found: \
+${USERDATA_IMAGE_FILE}."
+ return
+ fi
+ # shellcheck disable=SC2039
+ local previousResult="${RESULT}"
+ RESULT=false
+ if ! timeout "${ADB_CONNECT_TEST_TIMEOUT_SECS}" adb reboot bootloader; then
+ warn_msg "Reboot into bootloader failed."
+ return
+ fi
+ if ! fastboot flash userdata "${USERDATA_IMAGE_FILE}"; then
+ warn_msg "Flashing userdata image failed."
+ return
+ fi
+ if ! timeout 10 fastboot reboot; then
+ warn_msg "Device did not reboot from fastboot as expected."
+ return
+ fi
+ RESULT="${previousResult}"
+}
+
lava-test-set start keepAlive
@@ -89,6 +118,11 @@ while true; do
adb devices || true
reconnect_device
;;
+ userdata_reset)
+ info_msg "Userdata reset requested by master."
+ userdata_reset
+ reconnect_device
+ ;;
*)
lava-test-raise "Script error. Unexpected message from master to \
worker, command=${command}"
diff --git a/automated/android/multinode/wait-and-keep-local-device-accessible.yaml b/automated/android/multinode/wait-and-keep-local-device-accessible.yaml
index 35e3cad..720fedf 100644
--- a/automated/android/multinode/wait-and-keep-local-device-accessible.yaml
+++ b/automated/android/multinode/wait-and-keep-local-device-accessible.yaml
@@ -21,9 +21,14 @@ params:
ADB_TCPIP_ATTEMPTS: "5"
ADB_CONNECT_TEST_TIMEOUT_SECS: "60"
ANDROID_ENABLE_WIFI: "true"
+ # Userdata image file that will be used to reset devices to a clean state
+ # before starting TradeFed reruns.
+ USERDATA_IMAGE_FILE: ""
run:
steps:
- lava-install-packages --no-install-recommends python3-pip python3-setuptools python3-wheel
- pip3 install -q uiautomator
- - ./automated/android/multinode/wait-and-keep-local-device-accessible.sh
+ - |
+ USERDATA_IMAGE_FILE="${USERDATA_IMAGE_FILE}" \
+ ./automated/android/multinode/wait-and-keep-local-device-accessible.sh