BIGTOP-1030. Develop integration tests for new Spark component

Signed-off-by: Konstantin Boudnik <cos@apache.org>
author: Henry Wang <henry.wang@wandisco.com> 2013-09-10 16:41:43 -0700
committer: Konstantin Boudnik <cos@apache.org> 2013-09-10 16:41:43 -0700
commit: 95bac804ea5f98c5d448e9a404604d825fb50af8 (patch)
tree: de7b7071de513a778445548bb673235979a67cdc
parent: 46b28b00d2dccdfe491c98a5d3cb102e72733adc (diff)
7 files changed, 405 insertions, 0 deletions
diff --git a/bigtop-tests/test-artifacts/pom.xml b/bigtop-tests/test-artifacts/pom.xml
index 9f46627c..bcbdf590 100644
--- a/bigtop-tests/test-artifacts/pom.xml
+++ b/bigtop-tests/test-artifacts/pom.xml
@@ -48,6 +48,7 @@
     <module>datafu</module>
     <module>fatjar</module>
     <module>hcatalog</module>
+    <module>spark</module>
   </modules>
 
   <dependencies>
diff --git a/bigtop-tests/test-artifacts/spark/pom.xml b/bigtop-tests/test-artifacts/spark/pom.xml
new file mode 100644
index 00000000..a29f87fa
--- /dev/null
+++ b/bigtop-tests/test-artifacts/spark/pom.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.bigtop.itest</groupId>
+    <artifactId>bigtop-smokes</artifactId>
+    <version>0.7.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.bigtop.itest</groupId>
+  <artifactId>spark-smoke</artifactId>
+  <version>0.7.0-SNAPSHOT</version>
+  <name>sparksmoke</name>
+
+  <repositories>
+    <repository>
+      <id>akka-repo</id>
+      <name>Akka Repository</name>
+      <url>http://repo.akka.io/releases/</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+    <repository>
+      <id>spray-repo</id>
+      <name>Spray Repository</name>
+      <url>http://repo.spray.cc/</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+  </repositories>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.spark-project</groupId>
+      <artifactId>spark-core</artifactId>
+      <version>0.8.0-SNAPSHOT</version>
+      <classifier>hadoop2-yarn</classifier>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy
new file mode 100644
index 00000000..414ec9c4
--- /dev/null
+++ b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bigtop.itest.spark
+
+import org.apache.bigtop.itest.shell.Shell
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.fs.Path
+
+import org.junit.Test
+import org.junit.BeforeClass
+import static org.junit.Assert.assertEquals
+
+import static org.apache.bigtop.itest.LogErrorsUtils.logError
+
+import spark.api.java.*
+import spark.api.java.function.Function
+
+public class TestSparkSmoke implements Serializable {
+
+  private static String SPARK_HOME = System.getenv("SPARK_HOME")
+  private static String SPARK_MASTER = System.getenv("SPARK_MASTER")
+  private static String USER = System.getProperty("user.name")
+  private static String pwd = ""
+  private static Configuration conf
+  static Shell sh = new Shell("/bin/bash -s")
+  def result = ["0.2: 3", "0.1: 3", "0.0: 3", "9.0: 3", "9.2: 3", "9.1: 3"]
+
+  @BeforeClass
+  static void setUp() {
+   sh.exec("pwd")
+   pwd = sh.out
+   int lastIndex = pwd.length() - 1
+   pwd = pwd.substring(1, lastIndex)
+  }
+
+  @Test
+  void ShellTest() {
+    String kmeans = "file://" + pwd + "/kmeans_data.txt"
+    sh.exec("cd ${SPARK_HOME} && ./spark-class org.apache.spark.examples.JavaWordCount local " + kmeans)
+    logError(sh)
+    assertEquals(result, sh.out)
+  }
+
+  @Test
+  public void HDFSTest() {
+    conf = new Configuration()
+    String fs_default_name = conf.get("fs.defaultFS")
+    FileSystem fs = FileSystem.get(conf)
+    String pathname = "/user/${USER}/kmeans_data.txt"
+    fs.copyFromLocalFile(new Path("kmeans_data.txt"), new Path(pathname))
+    fs.close()
+
+    String dfsname = fs_default_name + pathname
+    sh.exec("cd ${SPARK_HOME} && ./spark-class org.apache.spark.examples.JavaWordCount ${SPARK_MASTER} " + dfsname)
+    logError(sh)
+    assertEquals(result, sh.out)
+  }
+
+  @Test
+  public void JobTest() {
+    String logFile = "file://" + pwd + "/README.md";
+    String[] jars = [System.getProperty("sparkJar"), org.apache.bigtop.itest.JarContent.getJarURL("groovy.lang.GroovyObject")];
+
+    JavaSparkContext sc = new JavaSparkContext("local", "Simple Job",
+        SPARK_HOME, jars);
+
+    JavaRDD<String> logData = sc.textFile(logFile).cache();
+
+    long num_Spark = logData.filter(new Function<String, Boolean>() {
+      public Boolean call(String s) { return s.contains("Spark"); }
+    }).count();
+
+    long num_e = logData.filter(new Function<String, Boolean>() {
+      public Boolean call(String s) { return s.contains("e"); }
+    }).count();
+
+    assertEquals("Lines containing 'spark' should be 14", num_Spark, 14);
+    assertEquals("Lines containing 'e' should be 43", num_e, 43);
+  }
+
+}
diff --git a/bigtop-tests/test-artifacts/spark/src/main/resources/README.md b/bigtop-tests/test-artifacts/spark/src/main/resources/README.md
new file mode 100644
index 00000000..ba24ab43
--- /dev/null
+++ b/bigtop-tests/test-artifacts/spark/src/main/resources/README.md
@@ -0,0 +1,73 @@
+# Spark
+
+Lightning-Fast Cluster Computing - <http://www.spark-project.org/>
+
+
+## Online Documentation
+
+You can find the latest Spark documentation, including a programming
+guide, on the project webpage at <http://spark-project.org/documentation.html>.
+This README file only contains basic setup instructions.
+
+
+## Building
+
+Spark requires Scala 2.9.2 (Scala 2.10 is not yet supported). The project is
+built using Simple Build Tool (SBT), which is packaged with it. To build
+Spark and its example programs, run:
+
+    sbt/sbt package
+
+Spark also supports building using Maven. If you would like to build using Maven,
+see the [instructions for building Spark with Maven](http://spark-project.org/docs/latest/building-with-maven.html)
+in the spark documentation..
+
+To run Spark, you will need to have Scala's bin directory in your `PATH`, or
+you will need to set the `SCALA_HOME` environment variable to point to where
+you've installed Scala. Scala must be accessible through one of these
+methods on your cluster's worker nodes as well as its master.
+
+To run one of the examples, use `./run <class> <params>`. For example:
+
+    ./run spark.examples.SparkLR local[2]
+
+will run the Logistic Regression example locally on 2 CPUs.
+
+Each of the example programs prints usage help if no params are given.
+
+All of the Spark samples take a `<host>` parameter that is the cluster URL
+to connect to. This can be a mesos:// or spark:// URL, or "local" to run
+locally with one thread, or "local[N]" to run locally with N threads.
+
+
+## A Note About Hadoop Versions
+
+Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported
+storage systems. Because the HDFS API has changed in different versions of
+Hadoop, you must build Spark against the same version that your cluster runs.
+You can change the version by setting the `HADOOP_VERSION` variable at the top
+of `project/SparkBuild.scala`, then rebuilding Spark.
+
+
+## Configuration
+
+Please refer to the "Configuration" guide in the online documentation for a
+full overview on how to configure Spark. At the minimum, you will need to
+create a `conf/spark-env.sh` script (copy `conf/spark-env.sh.template`) and
+set the following two variables:
+
+- `SCALA_HOME`: Location where Scala is installed.
+
+- `MESOS_NATIVE_LIBRARY`: Your Mesos library (only needed if you want to run
+  on Mesos). For example, this might be `/usr/local/lib/libmesos.so` on Linux.
+
+
+## Contributing to Spark
+
+Contributions via GitHub pull requests are gladly accepted from their original
+author. Along with any pull requests, please state that the contribution is
+your original work and that you license the work to the project under the
+project's open source license. Whether or not you state this explicitly, by
+submitting any copyrighted material via pull request, email, or other means
+you agree to license the material under the project's open source license and
+warrant that you have the legal authority to do so.
diff --git a/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt b/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt
new file mode 100644
index 00000000..338664f7
--- /dev/null
+++ b/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt
@@ -0,0 +1,6 @@
+0.0 0.0 0.0
+0.1 0.1 0.1
+0.2 0.2 0.2
+9.0 9.0 9.0
+9.1 9.1 9.1
+9.2 9.2 9.2
diff --git a/bigtop-tests/test-execution/smokes/spark/pom.xml b/bigtop-tests/test-execution/smokes/spark/pom.xml
new file mode 100644
index 00000000..e7a80d94
--- /dev/null
+++ b/bigtop-tests/test-execution/smokes/spark/pom.xml
@@ -0,0 +1,166 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.bigtop.itest</groupId>
+    <artifactId>smoke-tests</artifactId>
+    <version>0.7.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.bigtop.itest</groupId>
+  <artifactId>spark-smoke-execution</artifactId>
+  <version>0.7.0-SNAPSHOT</version>
+  <name>Spark smoke test execution</name>
+
+  <properties>
+    <SPARK_HOME>${env.SPARK_HOME}</SPARK_HOME>
+    <SPARK_MASTER>${env.SPARK_MASTER}</SPARK_MASTER>
+    <org.apache.maven-dependency-plugin.groupId>org.apache.bigtop.itest</org.apache.maven-dependency-plugin.groupId>
+    <org.apache.maven-dependency-plugin.artifactId>spark-smoke</org.apache.maven-dependency-plugin.artifactId>
+    <org.apache.maven-dependency-plugin.version>${spark-smoke.version}</org.apache.maven-dependency-plugin.version>
+    <org.apache.maven-dependency-plugin.output>${project.build.directory}
+    </org.apache.maven-dependency-plugin.output>
+    <org.apache.maven-dependency-plugin.pattern>**/*</org.apache.maven-dependency-plugin.pattern>
+    <org.apache.maven-dependency-plugin.type>jar</org.apache.maven-dependency-plugin.type>
+  </properties>
+
+  <repositories>
+    <repository>
+      <id>akka-repo</id>
+      <name>Akka Repository</name>
+      <url>http://repo.akka.io/releases/</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+    <repository>
+      <id>spray-repo</id>
+      <name>Spray Repository</name>
+      <url>http://repo.spray.cc/</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+  </repositories>
+
+  <dependencies>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+      <version>1.1</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.cxf</groupId>
+      <artifactId>cxf-rt-frontend-jaxrs</artifactId>
+      <version>2.5.0</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>asm</groupId>
+          <artifactId>asm</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+    <!-- explicit dependency is needed to pull down transient deps. such as hadoop-examples -->
+    <dependency>
+      <groupId>${org.apache.maven-dependency-plugin.groupId}</groupId>
+      <artifactId>${org.apache.maven-dependency-plugin.artifactId}</artifactId>
+      <version>${spark-smoke.version}</version>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-failsafe-plugin</artifactId>
+        <version>2.11</version>
+        <configuration>
+          <forkMode>always</forkMode>
+          <systemPropertyVariables>
+            <sparkJar>
+              ${project.build.directory}/${org.apache.maven-dependency-plugin.artifactId}-${org.apache.maven-dependency-plugin.version}.${org.apache.maven-dependency-plugin.type}
+            </sparkJar>
+          </systemPropertyVariables>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-enforcer-plugin</artifactId>
+        <version>1.0</version>
+        <executions>
+          <execution>
+            <id>enforce-property</id>
+            <goals>
+              <goal>enforce</goal>
+            </goals>
+            <configuration>
+              <rules>
+                <requireProperty>
+                  <property>SPARK_HOME</property>
+                  <message>SPARK_HOME env. variable has to be set</message>
+                </requireProperty>
+                <requireProperty>
+                  <property>SPARK_MASTER</property>
+                  <message>SPARK_MASTER env. variable has to be set</message>
+                </requireProperty>
+                <requireProperty>
+                  <property>HADOOP_CONF_DIR</property>
+                  <message>HADOOP_CONF_DIR env. variable has to be set</message>
+                </requireProperty>
+              </rules>
+              <fail>true</fail>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      
+    </plugins>
+  </build>
+</project>
diff --git a/pom.xml b/pom.xml
index f89b3fa5..0ac1d496 100644
--- a/pom.xml
+++ b/pom.xml
@@ -44,6 +44,7 @@
     <zookeeper.version>3.4.5</zookeeper.version>
     <giraph.version>0.2-SNAPSHOT</giraph.version>
     <solr.version>4.2.1</solr.version>
+    <spark-smoke.version>${project.version}</spark-smoke.version>
 
     <itest-common.version>${project.version}</itest-common.version>
author	Henry Wang <henry.wang@wandisco.com>	2013-09-10 16:41:43 -0700
committer	Konstantin Boudnik <cos@apache.org>	2013-09-10 16:41:43 -0700
commit	95bac804ea5f98c5d448e9a404604d825fb50af8 (patch)
tree	de7b7071de513a778445548bb673235979a67cdc
parent	46b28b00d2dccdfe491c98a5d3cb102e72733adc (diff)