diff options
Diffstat (limited to 'bigtop-test-framework/src/main/groovy/org/apache')
5 files changed, 433 insertions, 0 deletions
diff --git a/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/AbstractFailure.groovy b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/AbstractFailure.groovy new file mode 100644 index 00000000..d4f117d0 --- /dev/null +++ b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/AbstractFailure.groovy @@ -0,0 +1,179 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bigtop.itest.failures + +import org.apache.bigtop.itest.shell.Shell +import static org.apache.bigtop.itest.LogErrorsUtils.logError +import static org.apache.bigtop.itest.failures.FailureConstants.PRIVATE_KEY_PATH_ENV_VAR +import static org.apache.bigtop.itest.failures.FailureConstants.BIGTOP_SMOKES_USER + +/** + * Abstract class to be subclassed by cluster failures classes of various types: + * - service restart + * - service being killed (kill -9) + * - network shutdown (iptables-based drop). + * + * Provides means to: + * - run set of "failure" commands against the specified list of hosts + * - restore the correct state. + * + * Please see examples of usage in test class ClusterFailuresTest. + * + * WARNING: + * - password-less (PKI-based) SSH for user specified in env variable BIGTOP_SMOKES_USER + * to all nodes in cluster being tested is assumed + * - for local tests, like ClusterFailuresTest, this SSH should be setup for localhost + * - env variable BIGTOP_SMOKES_CLUSTER_IDENTITY_FILE should point to according private key file. + */ +public abstract class AbstractFailure implements Runnable { + protected static Shell rootShell = new Shell("/bin/bash", "root") + + /** + * Used to wrap actual command to be executed over SSH, if running in distributed setup. + * First substitution param is path to SSH private key, second - remote server username, + * third - remote server host address, forth - actual command being wrapped. + */ + protected static String SSH_COMMAND_WRAPPER = "ssh -i %s -o StrictHostKeyChecking=no %s@%s '%s'" + + /** + * List of hosts to run fail/restore commands against. + */ + protected List<String> hosts = [] + + /** + * List of failing commands, defined by a subclass, execute in given sequence. + */ + protected List<String> failCommands = [] + + /** + * List of restore commands, defined by a subclass, execute in given sequence. + */ + protected List<String> restoreCommands = [] + + /** + * How long (in milliseconds) shall we wait before executing first failure. + */ + protected long startDelay = 0 + + /** + * How long failure thread waits before next check if failure is over and it should call restore commands. + */ + private static final SLEEP_TIME = 100; + + /** + * Simple constructor for failures, uses default values. + * @param hosts list of hosts this failure will be executed on. + */ + public AbstractFailure(List<String> hosts) { + this.hosts = hosts + } + + /** + * Constructor allowing to set all params. + * + * @param hosts list of hosts the failure will be running against + * @param startDelay how long (in millisecs) failure will wait before starting + */ + public AbstractFailure(List<String> hosts, long startDelay) { + this.hosts = hosts + this.startDelay = startDelay + } + + /** + * Runs failure/restore commands in a separate thread. + */ + @Override + public void run() { + try { + if (startDelay > 0) { + try { + Thread.sleep(startDelay) + } catch (InterruptedException e) { + Thread.currentThread().interrupt() + return + } + } + + runFailCommands() + + while (!Thread.currentThread().isInterrupted()) { + try { + Thread.sleep(SLEEP_TIME) + } catch (InterruptedException e) { + return + } + } + } finally { + runRestoreCommands() + } + } + + private void runRestoreCommands() { + restoreCommands.each { + rootShell.exec(it) + logError(rootShell) + assert rootShell.getRet() == 0, "Restore command $it has returned non-0 error code:" + } + } + + private void runFailCommands() { + failCommands.each { + rootShell.exec(it) + logError(rootShell) + + //some commands, like pkill over ssh, return 137. It's ok. + //assertTrue(rootShell.getRet() == 0) + } + } + + /** + * Reads the full path to private key file from env. variable PRIVATE_KEY_PATH_ENV_VAR. + * @return full path to file with private key for SSH connections to cluster. + */ + protected String getIdentityFile() { + String identityFile = System.getenv(PRIVATE_KEY_PATH_ENV_VAR) + assert identityFile, "Env variable $PRIVATE_KEY_PATH_ENV_VAR is not set:" + return identityFile + } + + /** + * Reads the username used for ssh commands from env. variable BIGTOP_SMOKES_USER. + * @return user which will be used to run SSH command on target hosts + */ + protected String getSshUser() { + String sshUser = System.getenv(BIGTOP_SMOKES_USER) + assert sshUser, "Env variable $BIGTOP_SMOKES_USER is not set:" + return sshUser + } + + /** + * If tests are running in distributed mode, i.e. not itest framework tests, + * but real cluster smoke tests, wrapping failure command to go over SSH to node on the cluster. + * + * @param formattedCommand actual failure command to be executed on the remote node + * @param host remote node to run command on + * @return full command to be executed in the local shell + */ + protected String getSshWrappedCommand(String formattedCommand, String host) { + def identityFile = getIdentityFile() + def sshUser = getSshUser() + + return String.format(SSH_COMMAND_WRAPPER, identityFile, sshUser, host, formattedCommand); + } +} diff --git a/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/FailureConstants.groovy b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/FailureConstants.groovy new file mode 100644 index 00000000..0c246815 --- /dev/null +++ b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/FailureConstants.groovy @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bigtop.itest.failures + +/** + * Constants for cluster failure smoke tests. + */ +public final class FailureConstants { + + /** + * Env variable which should contain full local path to the file with SSH private key + * used to remotely login on cluster nodes without password. + */ + public static final PRIVATE_KEY_PATH_ENV_VAR = "BIGTOP_SMOKES_CLUSTER_IDENTITY_FILE" + + /** + * Env variable which should contain name of Linux user on the hosts where failures are running, + * this user should have password-less SSH enabled and privileges to run password-less sudo + * commands: service stop/start, pkill -9, iptables rules editing. + */ + public static final BIGTOP_SMOKES_USER = "BIGTOP_SMOKES_USER" +} diff --git a/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/NetworkShutdownFailure.groovy b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/NetworkShutdownFailure.groovy new file mode 100644 index 00000000..15bf7970 --- /dev/null +++ b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/NetworkShutdownFailure.groovy @@ -0,0 +1,78 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bigtop.itest.failures + +/** + * Can shutdown network connections between specified hosts during tests execution. + */ +public class NetworkShutdownFailure extends AbstractFailure { + + private static final String DROP_INPUT_CONNECTIONS = "sudo iptables -A INPUT -s %s -j DROP" + private static final String DROP_OUTPUT_CONNECTIONS = "sudo iptables -A OUTPUT -d %s -j DROP" + private static final String RESTORE_INPUT_CONNECTIONS = "sudo iptables -D INPUT -s %s -j DROP" + private static final String RESTORE_OUTPUT_CONNECTIONS = "sudo iptables -D OUTPUT -d %s -j DROP" + + /** + * Creates list of network disruptions between specified hosts. + * + * @param srcHost host whose connections will but cut + * @param dstHosts destination hosts connections to which from srcHost will be shut down. + */ + public NetworkShutdownFailure(String srcHost, List<String> dstHosts) { + super(new ArrayList<String>()) + populateCommandsList(srcHost, dstHosts) + } + + /** + * Creates list of network disruptions between specified hosts, + * allows to set all additional params. + * + * @param srcHost host whose connections will but cut + * @param dstHosts destination hosts connections to which from srcHost will be shut down + * @param startDelay time in milliseconds) the failures will wait before start + */ + public NetworkShutdownFailure(String srcHost, + List<String> dstHosts, + long startDelay) { + + super(new ArrayList<String>(), startDelay) + populateCommandsList(srcHost, dstHosts) + } + + /* + * Populate commands list, making choice between local execution and remote one. + */ + private void populateCommandsList(String host, List<String> dstHosts){ + if ("localhost".equalsIgnoreCase(host)) { + dstHosts.each { dstHost -> + failCommands.add(String.format(DROP_INPUT_CONNECTIONS, dstHost)) + failCommands.add(String.format(DROP_OUTPUT_CONNECTIONS, dstHost)) + restoreCommands.add(String.format(RESTORE_INPUT_CONNECTIONS, dstHost)) + restoreCommands.add(String.format(RESTORE_OUTPUT_CONNECTIONS, dstHost)) + } + } else { + dstHosts.each { dstHost -> + failCommands.add(getSshWrappedCommand(String.format(DROP_INPUT_CONNECTIONS, dstHost), host)) + failCommands.add(getSshWrappedCommand(String.format(DROP_OUTPUT_CONNECTIONS, dstHost), host)) + restoreCommands.add(getSshWrappedCommand(String.format(RESTORE_INPUT_CONNECTIONS, dstHost), host)) + restoreCommands.add(getSshWrappedCommand(String.format(RESTORE_OUTPUT_CONNECTIONS, dstHost), host)) + } + } + } +} diff --git a/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/ServiceKilledFailure.groovy b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/ServiceKilledFailure.groovy new file mode 100644 index 00000000..413f1713 --- /dev/null +++ b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/ServiceKilledFailure.groovy @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bigtop.itest.failures + +/** + * Can kill (with kill -9) specified service on specified hosts during tests run. + */ +public class ServiceKilledFailure extends AbstractFailure { + + private static final String KILL_SERVICE_TEMPLATE = "sudo pkill -9 -f %s" + private static final String START_SERVICE_TEMPLATE = "sudo service %s start" + + /** + * Can kill specified service on specified hosts during tests run. + * + * @param hosts list of hosts on which specified service will be killed + * @param serviceName name of service to be killed. + */ + public ServiceKilledFailure(List<String> hosts, String serviceName) { + super(hosts) + populateCommandsList(hosts, serviceName) + } + + /** + * Can kill specified service on specified hosts during tests run. + * + * @param hosts list of hosts on which specified service will be killed + * @param serviceName name of service to be killed + * @param startDelay time in milliseconds) the failures will wait before start + */ + public ServiceKilledFailure(List<String> hosts, + String serviceName, + long startDelay) { + + super(hosts, startDelay) + populateCommandsList(hosts, serviceName) + } + + /* + * Populate commands list, making choice between local execution and remote one. + */ + private void populateCommandsList(List<String> hosts, String serviceName){ + if (hosts.size() == 1 && "localhost".equalsIgnoreCase(hosts[0])) { + failCommands.add(String.format(KILL_SERVICE_TEMPLATE, serviceName)) + restoreCommands.add(String.format(START_SERVICE_TEMPLATE, serviceName)) + } else { + hosts.each { host -> + failCommands.add(getSshWrappedCommand(String.format(KILL_SERVICE_TEMPLATE, serviceName), host)) + restoreCommands.add(getSshWrappedCommand(String.format(START_SERVICE_TEMPLATE, serviceName), host)) + } + } + } +} diff --git a/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/ServiceRestartFailure.groovy b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/ServiceRestartFailure.groovy new file mode 100644 index 00000000..6dd1005c --- /dev/null +++ b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/ServiceRestartFailure.groovy @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bigtop.itest.failures + +/** + * Can restart specified services on specified hosts during tests execution. + */ +public class ServiceRestartFailure extends AbstractFailure { + + private static final String STOP_SERVICE_TEMPLATE = "sudo service %s stop" + private static final String START_SERVICE_TEMPLATE = "sudo service %s start" + + /** + * Can restart specified service on specified hosts during tests run. + * + * @param hosts list of hosts on which specified service will be restarted + * @param serviceName name of service to be restarted. + */ + public ServiceRestartFailure(List<String> hosts, String serviceName) { + super(hosts) + populateCommandsList(hosts, serviceName) + } + + /** + * Can gracefully restart specified service on specified hosts during tests run. + * + * @param hosts list of hosts on which specified service will be restarted + * @param serviceName name of service to be restarted + * @param startDelay time in milliseconds) the failures will wait before start + */ + public ServiceRestartFailure(List<String> hosts, + String serviceName, + long startDelay) { + + super(hosts, startDelay) + populateCommandsList(hosts, serviceName) + } + + /* + * Populate commands list, making choice between local execution and remote one. + */ + private void populateCommandsList(List<String> hosts, String serviceName){ + if (hosts.size() == 1 && "localhost".equalsIgnoreCase(hosts[0])) { + failCommands.add(String.format(STOP_SERVICE_TEMPLATE, serviceName)) + restoreCommands.add(String.format(START_SERVICE_TEMPLATE, serviceName)) + } else { + hosts.each { host -> + failCommands.add(getSshWrappedCommand(String.format(STOP_SERVICE_TEMPLATE, serviceName), host)) + restoreCommands.add(getSshWrappedCommand(String.format(START_SERVICE_TEMPLATE, serviceName), host)) + } + } + } +} |