aboutsummaryrefslogtreecommitdiff
path: root/bigtop-test-framework
diff options
context:
space:
mode:
authorMikhail Antonov <olorinbant@gmail.com>2014-02-03 15:19:21 -0800
committerKonstantin Boudnik <cos@apache.org>2014-02-03 15:19:21 -0800
commit40b398553ff4e00b68548e219aa58c3a8fec2232 (patch)
tree79feb0eec91e2abde3b21a643daefd48eeffd1ff /bigtop-test-framework
parent553a6ca128dbb62862a0d456f765250dac7b8371 (diff)
BIGTOP-1192. Add utilities to facilitate cluster failure testing into bigtop-test-framework
Diffstat (limited to 'bigtop-test-framework')
-rw-r--r--bigtop-test-framework/README40
-rw-r--r--bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/AbstractFailure.groovy179
-rw-r--r--bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/FailureConstants.groovy38
-rw-r--r--bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/NetworkShutdownFailure.groovy78
-rw-r--r--bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/ServiceKilledFailure.groovy69
-rw-r--r--bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/ServiceRestartFailure.groovy69
-rw-r--r--bigtop-test-framework/src/test/groovy/org/apache/bigtop/itest/failures/ClusterFailuresTest.groovy153
7 files changed, 626 insertions, 0 deletions
diff --git a/bigtop-test-framework/README b/bigtop-test-framework/README
index 009972ce..0980a6a8 100644
--- a/bigtop-test-framework/README
+++ b/bigtop-test-framework/README
@@ -25,3 +25,43 @@ tasks via specific adapters:
This project contains the experimental foundation of JarRunner and a
meta-service utilizing it to perform tests execution. These two components
might be joined together later.
+
+New cluster failures injection capabilities for smoke tests (see BIGTOP-1192 for details)
+imposed certain additional requirements. Smoke tests running cluster failures against real clusters
+expect the following preconditions to be satistied:
+ - For all hosts in the cluster being smoke-tested there's a dedicated user(like "jenkins" or "bigtop"),
+ which has password-less SSH access to those hosts and permissions to execute certain sudo commands
+ (see below) without password
+ - 2 new environment variable are set:
+ * BIGTOP_SMOKES_USER should contain username of user which will be used to run SSH commands
+ * BIGTOP_SMOKES_CLUSTER_IDENTITY_FILE should point to a file with private key for password-less SSH.
+
+To be able to run new itest framework internal test (see ClusterFailuresTest.groovy), the following change
+is needed on the machine with Bigtop workspace:
+ - if your current user doesn't have password-less sude, then run 'visudo' and:
+ * add line in /etc/sudoers at the end of file like: (for Redhat):
+ myusername localhost = NOPASSWD:/usr/sbin/service,/usr/bin/pkill,/usr/sbin/iptables
+
+To be able to run real module smoke tests again real cluster, more complex setup is needed
+on the machine with Bigtop workspace:
+ - make sure sshd is all nodes in the cluster
+ - check the following in /etc/ssh/sshd_config:
+ * PubkeyAuthentication yes
+ * PasswordAuthentication yes
+ - add new user, for example, "bigtop", on local machine AND on each cluster node, set some password,
+ make sure default shell is set
+ - on local machine, su bigtop, do ssh-keygen with empty passphrase
+ - run 'ssh-copy-id bigtop@<each cluster node>'
+ - log back in on your local machine as your regular user which you use to work with your workspace, copy the generated private
+ key for bigtop user somewhere, do chown and make sure it has right permissions (like 600)
+ - export BIGTOP_SMOKES_CLUSTER_IDENTITY_FILE=/full/path/to/private/key
+ - export BIGTOP_SMOKES_USER=bigtop
+ - on each remote node, run 'visudo' and
+ * add line in /etc/sudoers at the end of file like: (for Redhat):
+ bigtop localhost = NOPASSWD:/usr/sbin/service,/usr/bin/pkill,/usr/sbin/iptables
+ * Comment out line 'Defaults requiretty', otherwise sudo may complain like
+ "Sorry, you must have a tty to run sudo"
+
+ - run following sample commands from your local machine to verify your setup:
+ * ssh -i /test_bigtop_ssh_key bigtop@<some cluster node> sudo service crond stop
+ * ssh -i /test_bigtop_ssh_key bigtop@<some cluster node> sudo service crond start
diff --git a/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/AbstractFailure.groovy b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/AbstractFailure.groovy
new file mode 100644
index 00000000..d4f117d0
--- /dev/null
+++ b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/AbstractFailure.groovy
@@ -0,0 +1,179 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bigtop.itest.failures
+
+import org.apache.bigtop.itest.shell.Shell
+import static org.apache.bigtop.itest.LogErrorsUtils.logError
+import static org.apache.bigtop.itest.failures.FailureConstants.PRIVATE_KEY_PATH_ENV_VAR
+import static org.apache.bigtop.itest.failures.FailureConstants.BIGTOP_SMOKES_USER
+
+/**
+ * Abstract class to be subclassed by cluster failures classes of various types:
+ * - service restart
+ * - service being killed (kill -9)
+ * - network shutdown (iptables-based drop).
+ *
+ * Provides means to:
+ * - run set of "failure" commands against the specified list of hosts
+ * - restore the correct state.
+ *
+ * Please see examples of usage in test class ClusterFailuresTest.
+ *
+ * WARNING:
+ * - password-less (PKI-based) SSH for user specified in env variable BIGTOP_SMOKES_USER
+ * to all nodes in cluster being tested is assumed
+ * - for local tests, like ClusterFailuresTest, this SSH should be setup for localhost
+ * - env variable BIGTOP_SMOKES_CLUSTER_IDENTITY_FILE should point to according private key file.
+ */
+public abstract class AbstractFailure implements Runnable {
+ protected static Shell rootShell = new Shell("/bin/bash", "root")
+
+ /**
+ * Used to wrap actual command to be executed over SSH, if running in distributed setup.
+ * First substitution param is path to SSH private key, second - remote server username,
+ * third - remote server host address, forth - actual command being wrapped.
+ */
+ protected static String SSH_COMMAND_WRAPPER = "ssh -i %s -o StrictHostKeyChecking=no %s@%s '%s'"
+
+ /**
+ * List of hosts to run fail/restore commands against.
+ */
+ protected List<String> hosts = []
+
+ /**
+ * List of failing commands, defined by a subclass, execute in given sequence.
+ */
+ protected List<String> failCommands = []
+
+ /**
+ * List of restore commands, defined by a subclass, execute in given sequence.
+ */
+ protected List<String> restoreCommands = []
+
+ /**
+ * How long (in milliseconds) shall we wait before executing first failure.
+ */
+ protected long startDelay = 0
+
+ /**
+ * How long failure thread waits before next check if failure is over and it should call restore commands.
+ */
+ private static final SLEEP_TIME = 100;
+
+ /**
+ * Simple constructor for failures, uses default values.
+ * @param hosts list of hosts this failure will be executed on.
+ */
+ public AbstractFailure(List<String> hosts) {
+ this.hosts = hosts
+ }
+
+ /**
+ * Constructor allowing to set all params.
+ *
+ * @param hosts list of hosts the failure will be running against
+ * @param startDelay how long (in millisecs) failure will wait before starting
+ */
+ public AbstractFailure(List<String> hosts, long startDelay) {
+ this.hosts = hosts
+ this.startDelay = startDelay
+ }
+
+ /**
+ * Runs failure/restore commands in a separate thread.
+ */
+ @Override
+ public void run() {
+ try {
+ if (startDelay > 0) {
+ try {
+ Thread.sleep(startDelay)
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt()
+ return
+ }
+ }
+
+ runFailCommands()
+
+ while (!Thread.currentThread().isInterrupted()) {
+ try {
+ Thread.sleep(SLEEP_TIME)
+ } catch (InterruptedException e) {
+ return
+ }
+ }
+ } finally {
+ runRestoreCommands()
+ }
+ }
+
+ private void runRestoreCommands() {
+ restoreCommands.each {
+ rootShell.exec(it)
+ logError(rootShell)
+ assert rootShell.getRet() == 0, "Restore command $it has returned non-0 error code:"
+ }
+ }
+
+ private void runFailCommands() {
+ failCommands.each {
+ rootShell.exec(it)
+ logError(rootShell)
+
+ //some commands, like pkill over ssh, return 137. It's ok.
+ //assertTrue(rootShell.getRet() == 0)
+ }
+ }
+
+ /**
+ * Reads the full path to private key file from env. variable PRIVATE_KEY_PATH_ENV_VAR.
+ * @return full path to file with private key for SSH connections to cluster.
+ */
+ protected String getIdentityFile() {
+ String identityFile = System.getenv(PRIVATE_KEY_PATH_ENV_VAR)
+ assert identityFile, "Env variable $PRIVATE_KEY_PATH_ENV_VAR is not set:"
+ return identityFile
+ }
+
+ /**
+ * Reads the username used for ssh commands from env. variable BIGTOP_SMOKES_USER.
+ * @return user which will be used to run SSH command on target hosts
+ */
+ protected String getSshUser() {
+ String sshUser = System.getenv(BIGTOP_SMOKES_USER)
+ assert sshUser, "Env variable $BIGTOP_SMOKES_USER is not set:"
+ return sshUser
+ }
+
+ /**
+ * If tests are running in distributed mode, i.e. not itest framework tests,
+ * but real cluster smoke tests, wrapping failure command to go over SSH to node on the cluster.
+ *
+ * @param formattedCommand actual failure command to be executed on the remote node
+ * @param host remote node to run command on
+ * @return full command to be executed in the local shell
+ */
+ protected String getSshWrappedCommand(String formattedCommand, String host) {
+ def identityFile = getIdentityFile()
+ def sshUser = getSshUser()
+
+ return String.format(SSH_COMMAND_WRAPPER, identityFile, sshUser, host, formattedCommand);
+ }
+}
diff --git a/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/FailureConstants.groovy b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/FailureConstants.groovy
new file mode 100644
index 00000000..0c246815
--- /dev/null
+++ b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/FailureConstants.groovy
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bigtop.itest.failures
+
+/**
+ * Constants for cluster failure smoke tests.
+ */
+public final class FailureConstants {
+
+ /**
+ * Env variable which should contain full local path to the file with SSH private key
+ * used to remotely login on cluster nodes without password.
+ */
+ public static final PRIVATE_KEY_PATH_ENV_VAR = "BIGTOP_SMOKES_CLUSTER_IDENTITY_FILE"
+
+ /**
+ * Env variable which should contain name of Linux user on the hosts where failures are running,
+ * this user should have password-less SSH enabled and privileges to run password-less sudo
+ * commands: service stop/start, pkill -9, iptables rules editing.
+ */
+ public static final BIGTOP_SMOKES_USER = "BIGTOP_SMOKES_USER"
+}
diff --git a/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/NetworkShutdownFailure.groovy b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/NetworkShutdownFailure.groovy
new file mode 100644
index 00000000..15bf7970
--- /dev/null
+++ b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/NetworkShutdownFailure.groovy
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bigtop.itest.failures
+
+/**
+ * Can shutdown network connections between specified hosts during tests execution.
+ */
+public class NetworkShutdownFailure extends AbstractFailure {
+
+ private static final String DROP_INPUT_CONNECTIONS = "sudo iptables -A INPUT -s %s -j DROP"
+ private static final String DROP_OUTPUT_CONNECTIONS = "sudo iptables -A OUTPUT -d %s -j DROP"
+ private static final String RESTORE_INPUT_CONNECTIONS = "sudo iptables -D INPUT -s %s -j DROP"
+ private static final String RESTORE_OUTPUT_CONNECTIONS = "sudo iptables -D OUTPUT -d %s -j DROP"
+
+ /**
+ * Creates list of network disruptions between specified hosts.
+ *
+ * @param srcHost host whose connections will but cut
+ * @param dstHosts destination hosts connections to which from srcHost will be shut down.
+ */
+ public NetworkShutdownFailure(String srcHost, List<String> dstHosts) {
+ super(new ArrayList<String>())
+ populateCommandsList(srcHost, dstHosts)
+ }
+
+ /**
+ * Creates list of network disruptions between specified hosts,
+ * allows to set all additional params.
+ *
+ * @param srcHost host whose connections will but cut
+ * @param dstHosts destination hosts connections to which from srcHost will be shut down
+ * @param startDelay time in milliseconds) the failures will wait before start
+ */
+ public NetworkShutdownFailure(String srcHost,
+ List<String> dstHosts,
+ long startDelay) {
+
+ super(new ArrayList<String>(), startDelay)
+ populateCommandsList(srcHost, dstHosts)
+ }
+
+ /*
+ * Populate commands list, making choice between local execution and remote one.
+ */
+ private void populateCommandsList(String host, List<String> dstHosts){
+ if ("localhost".equalsIgnoreCase(host)) {
+ dstHosts.each { dstHost ->
+ failCommands.add(String.format(DROP_INPUT_CONNECTIONS, dstHost))
+ failCommands.add(String.format(DROP_OUTPUT_CONNECTIONS, dstHost))
+ restoreCommands.add(String.format(RESTORE_INPUT_CONNECTIONS, dstHost))
+ restoreCommands.add(String.format(RESTORE_OUTPUT_CONNECTIONS, dstHost))
+ }
+ } else {
+ dstHosts.each { dstHost ->
+ failCommands.add(getSshWrappedCommand(String.format(DROP_INPUT_CONNECTIONS, dstHost), host))
+ failCommands.add(getSshWrappedCommand(String.format(DROP_OUTPUT_CONNECTIONS, dstHost), host))
+ restoreCommands.add(getSshWrappedCommand(String.format(RESTORE_INPUT_CONNECTIONS, dstHost), host))
+ restoreCommands.add(getSshWrappedCommand(String.format(RESTORE_OUTPUT_CONNECTIONS, dstHost), host))
+ }
+ }
+ }
+}
diff --git a/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/ServiceKilledFailure.groovy b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/ServiceKilledFailure.groovy
new file mode 100644
index 00000000..413f1713
--- /dev/null
+++ b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/ServiceKilledFailure.groovy
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bigtop.itest.failures
+
+/**
+ * Can kill (with kill -9) specified service on specified hosts during tests run.
+ */
+public class ServiceKilledFailure extends AbstractFailure {
+
+ private static final String KILL_SERVICE_TEMPLATE = "sudo pkill -9 -f %s"
+ private static final String START_SERVICE_TEMPLATE = "sudo service %s start"
+
+ /**
+ * Can kill specified service on specified hosts during tests run.
+ *
+ * @param hosts list of hosts on which specified service will be killed
+ * @param serviceName name of service to be killed.
+ */
+ public ServiceKilledFailure(List<String> hosts, String serviceName) {
+ super(hosts)
+ populateCommandsList(hosts, serviceName)
+ }
+
+ /**
+ * Can kill specified service on specified hosts during tests run.
+ *
+ * @param hosts list of hosts on which specified service will be killed
+ * @param serviceName name of service to be killed
+ * @param startDelay time in milliseconds) the failures will wait before start
+ */
+ public ServiceKilledFailure(List<String> hosts,
+ String serviceName,
+ long startDelay) {
+
+ super(hosts, startDelay)
+ populateCommandsList(hosts, serviceName)
+ }
+
+ /*
+ * Populate commands list, making choice between local execution and remote one.
+ */
+ private void populateCommandsList(List<String> hosts, String serviceName){
+ if (hosts.size() == 1 && "localhost".equalsIgnoreCase(hosts[0])) {
+ failCommands.add(String.format(KILL_SERVICE_TEMPLATE, serviceName))
+ restoreCommands.add(String.format(START_SERVICE_TEMPLATE, serviceName))
+ } else {
+ hosts.each { host ->
+ failCommands.add(getSshWrappedCommand(String.format(KILL_SERVICE_TEMPLATE, serviceName), host))
+ restoreCommands.add(getSshWrappedCommand(String.format(START_SERVICE_TEMPLATE, serviceName), host))
+ }
+ }
+ }
+}
diff --git a/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/ServiceRestartFailure.groovy b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/ServiceRestartFailure.groovy
new file mode 100644
index 00000000..6dd1005c
--- /dev/null
+++ b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/ServiceRestartFailure.groovy
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bigtop.itest.failures
+
+/**
+ * Can restart specified services on specified hosts during tests execution.
+ */
+public class ServiceRestartFailure extends AbstractFailure {
+
+ private static final String STOP_SERVICE_TEMPLATE = "sudo service %s stop"
+ private static final String START_SERVICE_TEMPLATE = "sudo service %s start"
+
+ /**
+ * Can restart specified service on specified hosts during tests run.
+ *
+ * @param hosts list of hosts on which specified service will be restarted
+ * @param serviceName name of service to be restarted.
+ */
+ public ServiceRestartFailure(List<String> hosts, String serviceName) {
+ super(hosts)
+ populateCommandsList(hosts, serviceName)
+ }
+
+ /**
+ * Can gracefully restart specified service on specified hosts during tests run.
+ *
+ * @param hosts list of hosts on which specified service will be restarted
+ * @param serviceName name of service to be restarted
+ * @param startDelay time in milliseconds) the failures will wait before start
+ */
+ public ServiceRestartFailure(List<String> hosts,
+ String serviceName,
+ long startDelay) {
+
+ super(hosts, startDelay)
+ populateCommandsList(hosts, serviceName)
+ }
+
+ /*
+ * Populate commands list, making choice between local execution and remote one.
+ */
+ private void populateCommandsList(List<String> hosts, String serviceName){
+ if (hosts.size() == 1 && "localhost".equalsIgnoreCase(hosts[0])) {
+ failCommands.add(String.format(STOP_SERVICE_TEMPLATE, serviceName))
+ restoreCommands.add(String.format(START_SERVICE_TEMPLATE, serviceName))
+ } else {
+ hosts.each { host ->
+ failCommands.add(getSshWrappedCommand(String.format(STOP_SERVICE_TEMPLATE, serviceName), host))
+ restoreCommands.add(getSshWrappedCommand(String.format(START_SERVICE_TEMPLATE, serviceName), host))
+ }
+ }
+ }
+}
diff --git a/bigtop-test-framework/src/test/groovy/org/apache/bigtop/itest/failures/ClusterFailuresTest.groovy b/bigtop-test-framework/src/test/groovy/org/apache/bigtop/itest/failures/ClusterFailuresTest.groovy
new file mode 100644
index 00000000..af6b36b7
--- /dev/null
+++ b/bigtop-test-framework/src/test/groovy/org/apache/bigtop/itest/failures/ClusterFailuresTest.groovy
@@ -0,0 +1,153 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bigtop.itest.failures
+
+import org.apache.bigtop.itest.shell.OS
+import org.junit.Test
+import org.apache.bigtop.itest.shell.Shell
+
+public class ClusterFailuresTest {
+ private Shell rootShell = new Shell("/bin/bash", "root")
+ private final int SLEEP_TIME = 100
+ private final String TEST_HOST = "localhost"
+ private final String TEST_REMOTE_HOST = "apache.org"
+ private final String CRON_SERVICE
+
+ {
+ switch (OS.linux_flavor) {
+ case ~/(?is).*(redhat|centos|rhel|fedora|enterpriseenterpriseserver).*/:
+ CRON_SERVICE = "crond"
+ break
+ default:
+ CRON_SERVICE = "cron"
+ }
+ }
+
+ @Test
+ void testServiceRestart() {
+ startCron()
+ assert isCronRunning(), "$CRON_SERVICE service isn't running before the test:"
+
+ def cronKilled = new ServiceRestartFailure([TEST_HOST], "$CRON_SERVICE")
+ Thread t = new Thread(cronKilled)
+ t.start()
+
+ while (isCronRunning()) {
+ println "$CRON_SERVICE it still running"
+ Thread.sleep(SLEEP_TIME)
+ }
+
+ try{
+ assert !isCronRunning(), "$CRON_SERVICE hasn't been stopped as expected:"
+ println "$CRON_SERVICE stopped. Good."
+ } finally {
+ t.interrupt()
+ }
+
+ while (!isCronRunning()) {
+ println "$CRON_SERVICE it still stopped.."
+ Thread.sleep(SLEEP_TIME)
+ }
+
+ assert isCronRunning(), "$CRON_SERVICE hasn't been restarted after the test:"
+ println "$CRON_SERVICE is up. Good"
+ }
+
+ @Test
+ void testServiceKilled() {
+ // On Ubuntu services like cron or ssh get restarted automatically if killed,
+ // so for now disabling this test for Ubuntu users.
+ if (OS.linux_flavor ==~ /(?is).*(ubuntu|debian).*/) {
+ println "As you're running on $OS.linux_flavor, testServiceKilled() doesn't run for you."
+ return
+ }
+
+ startCron()
+ assert isCronRunning(), "$CRON_SERVICE service isn't running before the test:"
+
+ def cronKilled = new ServiceKilledFailure([TEST_HOST], "$CRON_SERVICE")
+ Thread t = new Thread(cronKilled)
+ t.start()
+
+ while (isCronRunning()) {
+ println "$CRON_SERVICE it still running.."
+ Thread.sleep(SLEEP_TIME)
+ }
+
+ try{
+ assert !isCronRunning(), "$CRON_SERVICE hasn't been killed as expected:"
+ println "$CRON_SERVICE killed. Good."
+ } finally {
+ t.interrupt()
+ }
+
+ while (!isCronRunning()) {
+ println "$CRON_SERVICE it still killed..."
+ Thread.sleep(SLEEP_TIME)
+ }
+
+ assert isCronRunning(), "$CRON_SERVICE hasn't been restarted after the test:"
+ println "$CRON_SERVICE is up. Good."
+ }
+
+ @Test
+ void testNetworkShutdown() {
+ //make sure there are no blocking rules
+ rootShell.exec("iptables -D INPUT -s $TEST_REMOTE_HOST -j DROP")
+ rootShell.exec("iptables -D OUTPUT -d $TEST_REMOTE_HOST -j DROP")
+
+ assert isRemoteHostReachable(), "No ping to $TEST_REMOTE_HOST, which is used for network failures test:"
+
+ def networkShutdown = new NetworkShutdownFailure(TEST_HOST, [TEST_REMOTE_HOST])
+ Thread t = new Thread(networkShutdown)
+ t.start()
+
+ while (isRemoteHostReachable()) {
+ println "$TEST_REMOTE_HOST is still reachable..."
+ Thread.sleep(SLEEP_TIME)
+ }
+
+ try{
+ assert !isRemoteHostReachable(), "Connection to $TEST_REMOTE_HOST hasn't been killed as expected:"
+ println "$TEST_REMOTE_HOST isn't reachable. Good."
+ } finally {
+ t.interrupt()
+ }
+
+ while (!isRemoteHostReachable()) {
+ println "$TEST_REMOTE_HOST isn't reachable..."
+ Thread.sleep(SLEEP_TIME)
+ }
+
+ assert isRemoteHostReachable(), "Connection to $TEST_REMOTE_HOST hasn't been restored after the test:"
+ println "$TEST_REMOTE_HOST is reachable again. Good."
+ }
+
+ private boolean isCronRunning() {
+ return rootShell.exec("pgrep $CRON_SERVICE").ret == 0 ? true : false
+ }
+
+ private void startCron() {
+ rootShell.exec("service $CRON_SERVICE start")
+ }
+
+ private boolean isRemoteHostReachable() {
+ return rootShell.exec("ping -qc 1 $TEST_REMOTE_HOST").ret == 0 ? true : false
+ }
+}