aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHenry Wang <henry.wang@wandisco.com>2013-09-10 16:41:43 -0700
committerKonstantin Boudnik <cos@apache.org>2013-09-10 16:41:43 -0700
commit95bac804ea5f98c5d448e9a404604d825fb50af8 (patch)
treede7b7071de513a778445548bb673235979a67cdc
parent46b28b00d2dccdfe491c98a5d3cb102e72733adc (diff)
BIGTOP-1030. Develop integration tests for new Spark component
Signed-off-by: Konstantin Boudnik <cos@apache.org>
-rw-r--r--bigtop-tests/test-artifacts/pom.xml1
-rw-r--r--bigtop-tests/test-artifacts/spark/pom.xml60
-rw-r--r--bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy98
-rw-r--r--bigtop-tests/test-artifacts/spark/src/main/resources/README.md73
-rw-r--r--bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt6
-rw-r--r--bigtop-tests/test-execution/smokes/spark/pom.xml166
-rw-r--r--pom.xml1
7 files changed, 405 insertions, 0 deletions
diff --git a/bigtop-tests/test-artifacts/pom.xml b/bigtop-tests/test-artifacts/pom.xml
index 9f46627c..bcbdf590 100644
--- a/bigtop-tests/test-artifacts/pom.xml
+++ b/bigtop-tests/test-artifacts/pom.xml
@@ -48,6 +48,7 @@
<module>datafu</module>
<module>fatjar</module>
<module>hcatalog</module>
+ <module>spark</module>
</modules>
<dependencies>
diff --git a/bigtop-tests/test-artifacts/spark/pom.xml b/bigtop-tests/test-artifacts/spark/pom.xml
new file mode 100644
index 00000000..a29f87fa
--- /dev/null
+++ b/bigtop-tests/test-artifacts/spark/pom.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.bigtop.itest</groupId>
+ <artifactId>bigtop-smokes</artifactId>
+ <version>0.7.0-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <groupId>org.apache.bigtop.itest</groupId>
+ <artifactId>spark-smoke</artifactId>
+ <version>0.7.0-SNAPSHOT</version>
+ <name>sparksmoke</name>
+
+ <repositories>
+ <repository>
+ <id>akka-repo</id>
+ <name>Akka Repository</name>
+ <url>http://repo.akka.io/releases/</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </repository>
+ <repository>
+ <id>spray-repo</id>
+ <name>Spray Repository</name>
+ <url>http://repo.spray.cc/</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </repository>
+ </repositories>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.spark-project</groupId>
+ <artifactId>spark-core</artifactId>
+ <version>0.8.0-SNAPSHOT</version>
+ <classifier>hadoop2-yarn</classifier>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy
new file mode 100644
index 00000000..414ec9c4
--- /dev/null
+++ b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bigtop.itest.spark
+
+import org.apache.bigtop.itest.shell.Shell
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.fs.Path
+
+import org.junit.Test
+import org.junit.BeforeClass
+import static org.junit.Assert.assertEquals
+
+import static org.apache.bigtop.itest.LogErrorsUtils.logError
+
+import spark.api.java.*
+import spark.api.java.function.Function
+
+public class TestSparkSmoke implements Serializable {
+
+ private static String SPARK_HOME = System.getenv("SPARK_HOME")
+ private static String SPARK_MASTER = System.getenv("SPARK_MASTER")
+ private static String USER = System.getProperty("user.name")
+ private static String pwd = ""
+ private static Configuration conf
+ static Shell sh = new Shell("/bin/bash -s")
+ def result = ["0.2: 3", "0.1: 3", "0.0: 3", "9.0: 3", "9.2: 3", "9.1: 3"]
+
+ @BeforeClass
+ static void setUp() {
+ sh.exec("pwd")
+ pwd = sh.out
+ int lastIndex = pwd.length() - 1
+ pwd = pwd.substring(1, lastIndex)
+ }
+
+ @Test
+ void ShellTest() {
+ String kmeans = "file://" + pwd + "/kmeans_data.txt"
+ sh.exec("cd ${SPARK_HOME} && ./spark-class org.apache.spark.examples.JavaWordCount local " + kmeans)
+ logError(sh)
+ assertEquals(result, sh.out)
+ }
+
+ @Test
+ public void HDFSTest() {
+ conf = new Configuration()
+ String fs_default_name = conf.get("fs.defaultFS")
+ FileSystem fs = FileSystem.get(conf)
+ String pathname = "/user/${USER}/kmeans_data.txt"
+ fs.copyFromLocalFile(new Path("kmeans_data.txt"), new Path(pathname))
+ fs.close()
+
+ String dfsname = fs_default_name + pathname
+ sh.exec("cd ${SPARK_HOME} && ./spark-class org.apache.spark.examples.JavaWordCount ${SPARK_MASTER} " + dfsname)
+ logError(sh)
+ assertEquals(result, sh.out)
+ }
+
+ @Test
+ public void JobTest() {
+ String logFile = "file://" + pwd + "/README.md";
+ String[] jars = [System.getProperty("sparkJar"), org.apache.bigtop.itest.JarContent.getJarURL("groovy.lang.GroovyObject")];
+
+ JavaSparkContext sc = new JavaSparkContext("local", "Simple Job",
+ SPARK_HOME, jars);
+
+ JavaRDD<String> logData = sc.textFile(logFile).cache();
+
+ long num_Spark = logData.filter(new Function<String, Boolean>() {
+ public Boolean call(String s) { return s.contains("Spark"); }
+ }).count();
+
+ long num_e = logData.filter(new Function<String, Boolean>() {
+ public Boolean call(String s) { return s.contains("e"); }
+ }).count();
+
+ assertEquals("Lines containing 'spark' should be 14", num_Spark, 14);
+ assertEquals("Lines containing 'e' should be 43", num_e, 43);
+ }
+
+}
diff --git a/bigtop-tests/test-artifacts/spark/src/main/resources/README.md b/bigtop-tests/test-artifacts/spark/src/main/resources/README.md
new file mode 100644
index 00000000..ba24ab43
--- /dev/null
+++ b/bigtop-tests/test-artifacts/spark/src/main/resources/README.md
@@ -0,0 +1,73 @@
+# Spark
+
+Lightning-Fast Cluster Computing - <http://www.spark-project.org/>
+
+
+## Online Documentation
+
+You can find the latest Spark documentation, including a programming
+guide, on the project webpage at <http://spark-project.org/documentation.html>.
+This README file only contains basic setup instructions.
+
+
+## Building
+
+Spark requires Scala 2.9.2 (Scala 2.10 is not yet supported). The project is
+built using Simple Build Tool (SBT), which is packaged with it. To build
+Spark and its example programs, run:
+
+ sbt/sbt package
+
+Spark also supports building using Maven. If you would like to build using Maven,
+see the [instructions for building Spark with Maven](http://spark-project.org/docs/latest/building-with-maven.html)
+in the spark documentation..
+
+To run Spark, you will need to have Scala's bin directory in your `PATH`, or
+you will need to set the `SCALA_HOME` environment variable to point to where
+you've installed Scala. Scala must be accessible through one of these
+methods on your cluster's worker nodes as well as its master.
+
+To run one of the examples, use `./run <class> <params>`. For example:
+
+ ./run spark.examples.SparkLR local[2]
+
+will run the Logistic Regression example locally on 2 CPUs.
+
+Each of the example programs prints usage help if no params are given.
+
+All of the Spark samples take a `<host>` parameter that is the cluster URL
+to connect to. This can be a mesos:// or spark:// URL, or "local" to run
+locally with one thread, or "local[N]" to run locally with N threads.
+
+
+## A Note About Hadoop Versions
+
+Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported
+storage systems. Because the HDFS API has changed in different versions of
+Hadoop, you must build Spark against the same version that your cluster runs.
+You can change the version by setting the `HADOOP_VERSION` variable at the top
+of `project/SparkBuild.scala`, then rebuilding Spark.
+
+
+## Configuration
+
+Please refer to the "Configuration" guide in the online documentation for a
+full overview on how to configure Spark. At the minimum, you will need to
+create a `conf/spark-env.sh` script (copy `conf/spark-env.sh.template`) and
+set the following two variables:
+
+- `SCALA_HOME`: Location where Scala is installed.
+
+- `MESOS_NATIVE_LIBRARY`: Your Mesos library (only needed if you want to run
+ on Mesos). For example, this might be `/usr/local/lib/libmesos.so` on Linux.
+
+
+## Contributing to Spark
+
+Contributions via GitHub pull requests are gladly accepted from their original
+author. Along with any pull requests, please state that the contribution is
+your original work and that you license the work to the project under the
+project's open source license. Whether or not you state this explicitly, by
+submitting any copyrighted material via pull request, email, or other means
+you agree to license the material under the project's open source license and
+warrant that you have the legal authority to do so.
diff --git a/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt b/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt
new file mode 100644
index 00000000..338664f7
--- /dev/null
+++ b/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt
@@ -0,0 +1,6 @@
+0.0 0.0 0.0
+0.1 0.1 0.1
+0.2 0.2 0.2
+9.0 9.0 9.0
+9.1 9.1 9.1
+9.2 9.2 9.2
diff --git a/bigtop-tests/test-execution/smokes/spark/pom.xml b/bigtop-tests/test-execution/smokes/spark/pom.xml
new file mode 100644
index 00000000..e7a80d94
--- /dev/null
+++ b/bigtop-tests/test-execution/smokes/spark/pom.xml
@@ -0,0 +1,166 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.bigtop.itest</groupId>
+ <artifactId>smoke-tests</artifactId>
+ <version>0.7.0-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <groupId>org.apache.bigtop.itest</groupId>
+ <artifactId>spark-smoke-execution</artifactId>
+ <version>0.7.0-SNAPSHOT</version>
+ <name>Spark smoke test execution</name>
+
+ <properties>
+ <SPARK_HOME>${env.SPARK_HOME}</SPARK_HOME>
+ <SPARK_MASTER>${env.SPARK_MASTER}</SPARK_MASTER>
+ <org.apache.maven-dependency-plugin.groupId>org.apache.bigtop.itest</org.apache.maven-dependency-plugin.groupId>
+ <org.apache.maven-dependency-plugin.artifactId>spark-smoke</org.apache.maven-dependency-plugin.artifactId>
+ <org.apache.maven-dependency-plugin.version>${spark-smoke.version}</org.apache.maven-dependency-plugin.version>
+ <org.apache.maven-dependency-plugin.output>${project.build.directory}
+ </org.apache.maven-dependency-plugin.output>
+ <org.apache.maven-dependency-plugin.pattern>**/*</org.apache.maven-dependency-plugin.pattern>
+ <org.apache.maven-dependency-plugin.type>jar</org.apache.maven-dependency-plugin.type>
+ </properties>
+
+ <repositories>
+ <repository>
+ <id>akka-repo</id>
+ <name>Akka Repository</name>
+ <url>http://repo.akka.io/releases/</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </repository>
+ <repository>
+ <id>spray-repo</id>
+ <name>Spray Repository</name>
+ <url>http://repo.spray.cc/</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </repository>
+ </repositories>
+
+ <dependencies>
+ <dependency>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ <version>1.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.cxf</groupId>
+ <artifactId>cxf-rt-frontend-jaxrs</artifactId>
+ <version>2.5.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>asm</groupId>
+ <artifactId>asm</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <!-- explicit dependency is needed to pull down transient deps. such as hadoop-examples -->
+ <dependency>
+ <groupId>${org.apache.maven-dependency-plugin.groupId}</groupId>
+ <artifactId>${org.apache.maven-dependency-plugin.artifactId}</artifactId>
+ <version>${spark-smoke.version}</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ <version>2.11</version>
+ <configuration>
+ <forkMode>always</forkMode>
+ <systemPropertyVariables>
+ <sparkJar>
+ ${project.build.directory}/${org.apache.maven-dependency-plugin.artifactId}-${org.apache.maven-dependency-plugin.version}.${org.apache.maven-dependency-plugin.type}
+ </sparkJar>
+ </systemPropertyVariables>
+ </configuration>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-enforcer-plugin</artifactId>
+ <version>1.0</version>
+ <executions>
+ <execution>
+ <id>enforce-property</id>
+ <goals>
+ <goal>enforce</goal>
+ </goals>
+ <configuration>
+ <rules>
+ <requireProperty>
+ <property>SPARK_HOME</property>
+ <message>SPARK_HOME env. variable has to be set</message>
+ </requireProperty>
+ <requireProperty>
+ <property>SPARK_MASTER</property>
+ <message>SPARK_MASTER env. variable has to be set</message>
+ </requireProperty>
+ <requireProperty>
+ <property>HADOOP_CONF_DIR</property>
+ <message>HADOOP_CONF_DIR env. variable has to be set</message>
+ </requireProperty>
+ </rules>
+ <fail>true</fail>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ </plugins>
+ </build>
+</project>
diff --git a/pom.xml b/pom.xml
index f89b3fa5..0ac1d496 100644
--- a/pom.xml
+++ b/pom.xml
@@ -44,6 +44,7 @@
<zookeeper.version>3.4.5</zookeeper.version>
<giraph.version>0.2-SNAPSHOT</giraph.version>
<solr.version>4.2.1</solr.version>
+ <spark-smoke.version>${project.version}</spark-smoke.version>
<itest-common.version>${project.version}</itest-common.version>