diff options
author | Dawson Choong <dawson.choong@wandisco.com> | 2014-10-08 22:20:56 -0400 |
---|---|---|
committer | jay@apache.org <jayunit100> | 2014-10-08 22:24:10 -0400 |
commit | 2a766aa27ab2db9177f708eddcbcfdd59530aa2f (patch) | |
tree | 93bee3e23a72c891bacc589e9dfcfeb1bd354a78 /bigtop-test-framework | |
parent | 04802cc5960c06108219e1ec886c895367798f7b (diff) |
BIGTOP 1388. Use cluster failure tests during other tests ith command line parameterization.
Signed-off-by: jay@apache.org <jayunit100>
Diffstat (limited to 'bigtop-test-framework')
5 files changed, 235 insertions, 30 deletions
diff --git a/bigtop-test-framework/README b/bigtop-test-framework/README index 0980a6a8..79bfdf32 100644 --- a/bigtop-test-framework/README +++ b/bigtop-test-framework/README @@ -65,3 +65,20 @@ on the machine with Bigtop workspace: - run following sample commands from your local machine to verify your setup: * ssh -i /test_bigtop_ssh_key bigtop@<some cluster node> sudo service crond stop * ssh -i /test_bigtop_ssh_key bigtop@<some cluster node> sudo service crond start + +The user can now run a series of cluster failures while running hadoop and mapreduce tests. This can be achieved by +modifying the vars.properties file found in src/main/resources. To enable the properties file, use -DuseProperties=true +when running tests (ex: mvn verify -f bigtop-tests/test-execution/longevity/pom.xml -DuseProperties=true +-Dorg.apache.maven-failsafe-plugin.testInclude=**/TestDFSIO*/). + +For vars.properties: +-Use testhost=name to specify host being tested on. +-Use testremotehost=name to specify remote host name being tested on. +-Use runall=true to run all cluster failures. +-Use servicerestart=true to perform a cron or crond service restart. +-Use servicekill=true to perform a service kill. +-Use networkshutdown=true to perform network shutdown and restart. +-Use service=name to specify which service being used for restart/kill. (default is crond). +-Use failuredelay=time to specify time (in seconds) in between each failure function. +-Use startdelay=time to specify time (in seconds) before first failure. +-Use killduration=time to specify the duration (in seconds) a service is down for.
\ No newline at end of file diff --git a/bigtop-test-framework/pom.xml b/bigtop-test-framework/pom.xml index 71828d2f..4d13b43e 100644 --- a/bigtop-test-framework/pom.xml +++ b/bigtop-test-framework/pom.xml @@ -61,6 +61,11 @@ <build> <plugins> <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <version>2.7</version> + </plugin> + <plugin> <groupId>org.codehaus.groovy</groupId> <artifactId>groovy-eclipse-compiler</artifactId> <version>${groovy-eclipse-compiler.version}</version> diff --git a/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/AbstractFailure.groovy b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/AbstractFailure.groovy index d4f117d0..f88358fc 100644 --- a/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/AbstractFailure.groovy +++ b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/AbstractFailure.groovy @@ -69,7 +69,7 @@ public abstract class AbstractFailure implements Runnable { /** * How long (in milliseconds) shall we wait before executing first failure. */ - protected long startDelay = 0 + protected long failureDelay = 0 /** * How long failure thread waits before next check if failure is over and it should call restore commands. @@ -88,11 +88,11 @@ public abstract class AbstractFailure implements Runnable { * Constructor allowing to set all params. * * @param hosts list of hosts the failure will be running against - * @param startDelay how long (in millisecs) failure will wait before starting + * @param failureDelay how long (in millisecs) failure will wait before starting */ - public AbstractFailure(List<String> hosts, long startDelay) { + public AbstractFailure(List<String> hosts, long failureDelay) { this.hosts = hosts - this.startDelay = startDelay + this.failureDelay = failureDelay } /** @@ -101,22 +101,36 @@ public abstract class AbstractFailure implements Runnable { @Override public void run() { try { - if (startDelay > 0) { + if(failureDelay > 0) { try { - Thread.sleep(startDelay) + Thread.sleep(failureDelay) } catch (InterruptedException e) { - Thread.currentThread().interrupt() return } } - - runFailCommands() - - while (!Thread.currentThread().isInterrupted()) { - try { - Thread.sleep(SLEEP_TIME) - } catch (InterruptedException e) { - return + if(FailureVars.instance.getServiceRestart().equals("true") + || FailureVars.instance.getServiceKill().equals("true") + || FailureVars.instance.getNetworkShutdown().equals("true")) { + runFailCommands() + Thread.sleep(FailureVars.instance.getKillDuration()) + } + else { + if (failureDelay > 0) { + try { + Thread.sleep(failureDelay) + } catch (InterruptedException e) { + Thread.currentThread().interrupt() + return + } + } + runFailCommands() + + while (!Thread.currentThread().isInterrupted()) { + try { + Thread.sleep(SLEEP_TIME) + } catch (InterruptedException e) { + return + } } } } finally { diff --git a/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/FailureVars.groovy b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/FailureVars.groovy new file mode 100644 index 00000000..6bc1ca3b --- /dev/null +++ b/bigtop-test-framework/src/main/groovy/org/apache/bigtop/itest/failures/FailureVars.groovy @@ -0,0 +1,155 @@ +package org.apache.bigtop.itest.failures + +import org.apache.bigtop.itest.shell.OS +import org.apache.bigtop.itest.shell.Shell +import java.io.BufferedReader +import java.io.IOException; + +/** + * This class manages objects, variables, and command line parameter values for cluster failure testing. + * By default, all parameters are off or set to false. + */ +public class FailureVars { + + private final String CRON_SERVICE; + private final int SLEEP_TIME = 100; + private static FailureVars instance = null; + private String propertyFile = "/vars.properties" + private Boolean useProperties = System.getProperty("useProperties", Boolean.FALSE.toString()); + private String testHost; + private String testRemoteHost; + private String runFailures; + private String serviceRestart; + private String serviceKill; + private String networkShutdown; + private String service; + private String failureDelay; + private String startDelay; + private String killDuration; + + { + switch (OS.linux_flavor) { + case ~/(?is).*(redhat|centos|rhel|fedora|enterpriseenterpriseserver).*/: + CRON_SERVICE = "crond" + break; + default: + CRON_SERVICE = "cron" + } + } + + protected FailureVars() { + if(useProperties) { + loadProps(); + } + } + + public static FailureVars getInstance() { + if(instance == null) { + instance = new FailureVars(); + } + return instance; + } + + private void loadProps() { + try { + BufferedReader is = new BufferedReader (new InputStreamReader(getClass().getResourceAsStream(propertyFile))); + System.out.println("Input Stream Location: " + is); + Properties props = new Properties(); + props.load(is); + is.close(); + testHost = props.getProperty("testhost", "localhost"); + testRemoteHost = props.getProperty("testremotehost", "apache.org"); + runFailures = props.getProperty("runall", Boolean.FALSE.toString()); + serviceRestart = props.getProperty("servicerestart", Boolean.FALSE.toString()); + serviceKill = props.getProperty("servicekill", Boolean.FALSE.toString()); + networkShutdown = props.getProperty("networkshutdown", Boolean.FALSE.toString()); + service = props.getProperty("service", "crond"); + failureDelay = props.getProperty("failuredelay", Integer.toString(0)); + startDelay = props.getProperty("startdelay", Integer.toString(0)); + killDuration = props.getProperty("killduration", Integer.toString(0)); + } + catch(IOException ioe) { + System.out.println(ioe); + } + } + + public void setKillDuration(String killDuration) { + this.killDuration = Long.toString(killDuration); + } + + public void setTestHost(String testHost) { + this.testHost = testHost; + } + + public void setTestRemoteHost(String testRemoteHost) { + this.testRemoteHost = testRemoteHost; + } + + public void setRunAll(String runFailures) { + this.runFailures = runFailures; + } + + public void setRestart(String serviceRestart) { + this.serviceRestart = serviceRestart; + } + + public void setKill(String serviceKill) { + this.serviceKill = serviceKill; + } + + public void setShutdown(String networkShutdown) { + this.networkShutdown = networkShutdown; + } + + public void setFailureDelay(long failureDelay) { + this.failureDelay = Long.toString(failureDelay); + } + + public void setService(String service) { + this.service = service; + } + + public long getKillDuration() { + return Long.parseLong(killDuration)*1000; + } + public String getTestHost() { + return testHost; + } + + public String getService() { + return service; + } + + public String getTestRemoteHost() { + return testRemoteHost; + } + + public long getStartDelay() { + return Long.parseLong(startDelay)*1000; + } + + + public String getRunFailures() { + return runFailures; + } + + public String getServiceRestart() { + return serviceRestart; + } + + public String getServiceKill() { + return serviceKill; + } + + public String getNetworkShutdown() { + return networkShutdown; + } + + public long getFailureDelay() { + return Long.parseLong(failureDelay)*1000; + } + + int getSleepTime() { + return SLEEP_TIME; + } +} diff --git a/bigtop-test-framework/src/test/groovy/org/apache/bigtop/itest/failures/ClusterFailuresTest.groovy b/bigtop-test-framework/src/test/groovy/org/apache/bigtop/itest/failures/ClusterFailuresTest.groovy index af6b36b7..1f099bea 100644 --- a/bigtop-test-framework/src/test/groovy/org/apache/bigtop/itest/failures/ClusterFailuresTest.groovy +++ b/bigtop-test-framework/src/test/groovy/org/apache/bigtop/itest/failures/ClusterFailuresTest.groovy @@ -19,15 +19,19 @@ package org.apache.bigtop.itest.failures import org.apache.bigtop.itest.shell.OS +import org.junit.Before import org.junit.Test import org.apache.bigtop.itest.shell.Shell public class ClusterFailuresTest { private Shell rootShell = new Shell("/bin/bash", "root") private final int SLEEP_TIME = 100 - private final String TEST_HOST = "localhost" - private final String TEST_REMOTE_HOST = "apache.org" private final String CRON_SERVICE + private String testHost; + private String testRemoteHost; + private String serviceRestart; + private String serviceKill; + private String networkShutdown; { switch (OS.linux_flavor) { @@ -39,12 +43,22 @@ public class ClusterFailuresTest { } } + @Before + void configureVars() { + def f = new FailureVars(); + testHost = f.getTestHost(); + testRemoteHost = f.getTestRemoteHost(); + serviceRestart = f.getServiceRestart(); + serviceKill = f.getServiceKill(); + networkShutdown = f.getNetworkShutdown(); + } + @Test void testServiceRestart() { startCron() assert isCronRunning(), "$CRON_SERVICE service isn't running before the test:" - def cronKilled = new ServiceRestartFailure([TEST_HOST], "$CRON_SERVICE") + def cronKilled = new ServiceRestartFailure([testHost], "$CRON_SERVICE") Thread t = new Thread(cronKilled) t.start() @@ -81,7 +95,7 @@ public class ClusterFailuresTest { startCron() assert isCronRunning(), "$CRON_SERVICE service isn't running before the test:" - def cronKilled = new ServiceKilledFailure([TEST_HOST], "$CRON_SERVICE") + def cronKilled = new ServiceKilledFailure([testHost], "$CRON_SERVICE") Thread t = new Thread(cronKilled) t.start() @@ -109,34 +123,34 @@ public class ClusterFailuresTest { @Test void testNetworkShutdown() { //make sure there are no blocking rules - rootShell.exec("iptables -D INPUT -s $TEST_REMOTE_HOST -j DROP") - rootShell.exec("iptables -D OUTPUT -d $TEST_REMOTE_HOST -j DROP") + rootShell.exec("iptables -D INPUT -s $testRemoteHost -j DROP") + rootShell.exec("iptables -D OUTPUT -d $testRemoteHost -j DROP") - assert isRemoteHostReachable(), "No ping to $TEST_REMOTE_HOST, which is used for network failures test:" + assert isRemoteHostReachable(), "No ping to $testRemoteHost, which is used for network failures test:" - def networkShutdown = new NetworkShutdownFailure(TEST_HOST, [TEST_REMOTE_HOST]) + def networkShutdown = new NetworkShutdownFailure(testHost, [testRemoteHost]) Thread t = new Thread(networkShutdown) t.start() while (isRemoteHostReachable()) { - println "$TEST_REMOTE_HOST is still reachable..." + println "$testRemoteHost is still reachable..." Thread.sleep(SLEEP_TIME) } try{ - assert !isRemoteHostReachable(), "Connection to $TEST_REMOTE_HOST hasn't been killed as expected:" - println "$TEST_REMOTE_HOST isn't reachable. Good." + assert !isRemoteHostReachable(), "Connection to $testRemoteHost hasn't been killed as expected:" + println "$testRemoteHost isn't reachable. Good." } finally { t.interrupt() } while (!isRemoteHostReachable()) { - println "$TEST_REMOTE_HOST isn't reachable..." + println "$testRemoteHost isn't reachable..." Thread.sleep(SLEEP_TIME) } - assert isRemoteHostReachable(), "Connection to $TEST_REMOTE_HOST hasn't been restored after the test:" - println "$TEST_REMOTE_HOST is reachable again. Good." + assert isRemoteHostReachable(), "Connection to $testRemoteHost hasn't been restored after the test:" + println "$testRemoteHost is reachable again. Good." } private boolean isCronRunning() { @@ -148,6 +162,6 @@ public class ClusterFailuresTest { } private boolean isRemoteHostReachable() { - return rootShell.exec("ping -qc 1 $TEST_REMOTE_HOST").ret == 0 ? true : false + return rootShell.exec("ping -qc 1 $testRemoteHost").ret == 0 ? true : false } } |