diff options
author | Henry Wang <henry.wang@wandisco.com> | 2013-09-10 16:41:43 -0700 |
---|---|---|
committer | Konstantin Boudnik <cos@apache.org> | 2013-09-10 16:41:43 -0700 |
commit | 95bac804ea5f98c5d448e9a404604d825fb50af8 (patch) | |
tree | de7b7071de513a778445548bb673235979a67cdc | |
parent | 46b28b00d2dccdfe491c98a5d3cb102e72733adc (diff) |
BIGTOP-1030. Develop integration tests for new Spark component
Signed-off-by: Konstantin Boudnik <cos@apache.org>
7 files changed, 405 insertions, 0 deletions
diff --git a/bigtop-tests/test-artifacts/pom.xml b/bigtop-tests/test-artifacts/pom.xml index 9f46627c..bcbdf590 100644 --- a/bigtop-tests/test-artifacts/pom.xml +++ b/bigtop-tests/test-artifacts/pom.xml @@ -48,6 +48,7 @@ <module>datafu</module> <module>fatjar</module> <module>hcatalog</module> + <module>spark</module> </modules> <dependencies> diff --git a/bigtop-tests/test-artifacts/spark/pom.xml b/bigtop-tests/test-artifacts/spark/pom.xml new file mode 100644 index 00000000..a29f87fa --- /dev/null +++ b/bigtop-tests/test-artifacts/spark/pom.xml @@ -0,0 +1,60 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.bigtop.itest</groupId> + <artifactId>bigtop-smokes</artifactId> + <version>0.7.0-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <groupId>org.apache.bigtop.itest</groupId> + <artifactId>spark-smoke</artifactId> + <version>0.7.0-SNAPSHOT</version> + <name>sparksmoke</name> + + <repositories> + <repository> + <id>akka-repo</id> + <name>Akka Repository</name> + <url>http://repo.akka.io/releases/</url> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>false</enabled> + </snapshots> + </repository> + <repository> + <id>spray-repo</id> + <name>Spray Repository</name> + <url>http://repo.spray.cc/</url> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>false</enabled> + </snapshots> + </repository> + </repositories> + + <dependencies> + <dependency> + <groupId>org.spark-project</groupId> + <artifactId>spark-core</artifactId> + <version>0.8.0-SNAPSHOT</version> + <classifier>hadoop2-yarn</classifier> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs</artifactId> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + </dependency> + </dependencies> +</project> diff --git a/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy new file mode 100644 index 00000000..414ec9c4 --- /dev/null +++ b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bigtop.itest.spark + +import org.apache.bigtop.itest.shell.Shell +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path + +import org.junit.Test +import org.junit.BeforeClass +import static org.junit.Assert.assertEquals + +import static org.apache.bigtop.itest.LogErrorsUtils.logError + +import spark.api.java.* +import spark.api.java.function.Function + +public class TestSparkSmoke implements Serializable { + + private static String SPARK_HOME = System.getenv("SPARK_HOME") + private static String SPARK_MASTER = System.getenv("SPARK_MASTER") + private static String USER = System.getProperty("user.name") + private static String pwd = "" + private static Configuration conf + static Shell sh = new Shell("/bin/bash -s") + def result = ["0.2: 3", "0.1: 3", "0.0: 3", "9.0: 3", "9.2: 3", "9.1: 3"] + + @BeforeClass + static void setUp() { + sh.exec("pwd") + pwd = sh.out + int lastIndex = pwd.length() - 1 + pwd = pwd.substring(1, lastIndex) + } + + @Test + void ShellTest() { + String kmeans = "file://" + pwd + "/kmeans_data.txt" + sh.exec("cd ${SPARK_HOME} && ./spark-class org.apache.spark.examples.JavaWordCount local " + kmeans) + logError(sh) + assertEquals(result, sh.out) + } + + @Test + public void HDFSTest() { + conf = new Configuration() + String fs_default_name = conf.get("fs.defaultFS") + FileSystem fs = FileSystem.get(conf) + String pathname = "/user/${USER}/kmeans_data.txt" + fs.copyFromLocalFile(new Path("kmeans_data.txt"), new Path(pathname)) + fs.close() + + String dfsname = fs_default_name + pathname + sh.exec("cd ${SPARK_HOME} && ./spark-class org.apache.spark.examples.JavaWordCount ${SPARK_MASTER} " + dfsname) + logError(sh) + assertEquals(result, sh.out) + } + + @Test + public void JobTest() { + String logFile = "file://" + pwd + "/README.md"; + String[] jars = [System.getProperty("sparkJar"), org.apache.bigtop.itest.JarContent.getJarURL("groovy.lang.GroovyObject")]; + + JavaSparkContext sc = new JavaSparkContext("local", "Simple Job", + SPARK_HOME, jars); + + JavaRDD<String> logData = sc.textFile(logFile).cache(); + + long num_Spark = logData.filter(new Function<String, Boolean>() { + public Boolean call(String s) { return s.contains("Spark"); } + }).count(); + + long num_e = logData.filter(new Function<String, Boolean>() { + public Boolean call(String s) { return s.contains("e"); } + }).count(); + + assertEquals("Lines containing 'spark' should be 14", num_Spark, 14); + assertEquals("Lines containing 'e' should be 43", num_e, 43); + } + +} diff --git a/bigtop-tests/test-artifacts/spark/src/main/resources/README.md b/bigtop-tests/test-artifacts/spark/src/main/resources/README.md new file mode 100644 index 00000000..ba24ab43 --- /dev/null +++ b/bigtop-tests/test-artifacts/spark/src/main/resources/README.md @@ -0,0 +1,73 @@ +# Spark + +Lightning-Fast Cluster Computing - <http://www.spark-project.org/> + + +## Online Documentation + +You can find the latest Spark documentation, including a programming +guide, on the project webpage at <http://spark-project.org/documentation.html>. +This README file only contains basic setup instructions. + + +## Building + +Spark requires Scala 2.9.2 (Scala 2.10 is not yet supported). The project is +built using Simple Build Tool (SBT), which is packaged with it. To build +Spark and its example programs, run: + + sbt/sbt package + +Spark also supports building using Maven. If you would like to build using Maven, +see the [instructions for building Spark with Maven](http://spark-project.org/docs/latest/building-with-maven.html) +in the spark documentation.. + +To run Spark, you will need to have Scala's bin directory in your `PATH`, or +you will need to set the `SCALA_HOME` environment variable to point to where +you've installed Scala. Scala must be accessible through one of these +methods on your cluster's worker nodes as well as its master. + +To run one of the examples, use `./run <class> <params>`. For example: + + ./run spark.examples.SparkLR local[2] + +will run the Logistic Regression example locally on 2 CPUs. + +Each of the example programs prints usage help if no params are given. + +All of the Spark samples take a `<host>` parameter that is the cluster URL +to connect to. This can be a mesos:// or spark:// URL, or "local" to run +locally with one thread, or "local[N]" to run locally with N threads. + + +## A Note About Hadoop Versions + +Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported +storage systems. Because the HDFS API has changed in different versions of +Hadoop, you must build Spark against the same version that your cluster runs. +You can change the version by setting the `HADOOP_VERSION` variable at the top +of `project/SparkBuild.scala`, then rebuilding Spark. + + +## Configuration + +Please refer to the "Configuration" guide in the online documentation for a +full overview on how to configure Spark. At the minimum, you will need to +create a `conf/spark-env.sh` script (copy `conf/spark-env.sh.template`) and +set the following two variables: + +- `SCALA_HOME`: Location where Scala is installed. + +- `MESOS_NATIVE_LIBRARY`: Your Mesos library (only needed if you want to run + on Mesos). For example, this might be `/usr/local/lib/libmesos.so` on Linux. + + +## Contributing to Spark + +Contributions via GitHub pull requests are gladly accepted from their original +author. Along with any pull requests, please state that the contribution is +your original work and that you license the work to the project under the +project's open source license. Whether or not you state this explicitly, by +submitting any copyrighted material via pull request, email, or other means +you agree to license the material under the project's open source license and +warrant that you have the legal authority to do so. diff --git a/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt b/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt new file mode 100644 index 00000000..338664f7 --- /dev/null +++ b/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt @@ -0,0 +1,6 @@ +0.0 0.0 0.0 +0.1 0.1 0.1 +0.2 0.2 0.2 +9.0 9.0 9.0 +9.1 9.1 9.1 +9.2 9.2 9.2 diff --git a/bigtop-tests/test-execution/smokes/spark/pom.xml b/bigtop-tests/test-execution/smokes/spark/pom.xml new file mode 100644 index 00000000..e7a80d94 --- /dev/null +++ b/bigtop-tests/test-execution/smokes/spark/pom.xml @@ -0,0 +1,166 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.bigtop.itest</groupId> + <artifactId>smoke-tests</artifactId> + <version>0.7.0-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <groupId>org.apache.bigtop.itest</groupId> + <artifactId>spark-smoke-execution</artifactId> + <version>0.7.0-SNAPSHOT</version> + <name>Spark smoke test execution</name> + + <properties> + <SPARK_HOME>${env.SPARK_HOME}</SPARK_HOME> + <SPARK_MASTER>${env.SPARK_MASTER}</SPARK_MASTER> + <org.apache.maven-dependency-plugin.groupId>org.apache.bigtop.itest</org.apache.maven-dependency-plugin.groupId> + <org.apache.maven-dependency-plugin.artifactId>spark-smoke</org.apache.maven-dependency-plugin.artifactId> + <org.apache.maven-dependency-plugin.version>${spark-smoke.version}</org.apache.maven-dependency-plugin.version> + <org.apache.maven-dependency-plugin.output>${project.build.directory} + </org.apache.maven-dependency-plugin.output> + <org.apache.maven-dependency-plugin.pattern>**/*</org.apache.maven-dependency-plugin.pattern> + <org.apache.maven-dependency-plugin.type>jar</org.apache.maven-dependency-plugin.type> + </properties> + + <repositories> + <repository> + <id>akka-repo</id> + <name>Akka Repository</name> + <url>http://repo.akka.io/releases/</url> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>false</enabled> + </snapshots> + </repository> + <repository> + <id>spray-repo</id> + <name>Spray Repository</name> + <url>http://repo.spray.cc/</url> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>false</enabled> + </snapshots> + </repository> + </repositories> + + <dependencies> + <dependency> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + <version>1.1</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.cxf</groupId> + <artifactId>cxf-rt-frontend-jaxrs</artifactId> + <version>2.5.0</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <exclusions> + <exclusion> + <groupId>asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs</artifactId> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-mapreduce-client-jobclient</artifactId> + <version>${hadoop.version}</version> + </dependency> + <!-- explicit dependency is needed to pull down transient deps. such as hadoop-examples --> + <dependency> + <groupId>${org.apache.maven-dependency-plugin.groupId}</groupId> + <artifactId>${org.apache.maven-dependency-plugin.artifactId}</artifactId> + <version>${spark-smoke.version}</version> + </dependency> + </dependencies> + + <build> + <plugins> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-dependency-plugin</artifactId> + </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-failsafe-plugin</artifactId> + <version>2.11</version> + <configuration> + <forkMode>always</forkMode> + <systemPropertyVariables> + <sparkJar> + ${project.build.directory}/${org.apache.maven-dependency-plugin.artifactId}-${org.apache.maven-dependency-plugin.version}.${org.apache.maven-dependency-plugin.type} + </sparkJar> + </systemPropertyVariables> + </configuration> + </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-enforcer-plugin</artifactId> + <version>1.0</version> + <executions> + <execution> + <id>enforce-property</id> + <goals> + <goal>enforce</goal> + </goals> + <configuration> + <rules> + <requireProperty> + <property>SPARK_HOME</property> + <message>SPARK_HOME env. variable has to be set</message> + </requireProperty> + <requireProperty> + <property>SPARK_MASTER</property> + <message>SPARK_MASTER env. variable has to be set</message> + </requireProperty> + <requireProperty> + <property>HADOOP_CONF_DIR</property> + <message>HADOOP_CONF_DIR env. variable has to be set</message> + </requireProperty> + </rules> + <fail>true</fail> + </configuration> + </execution> + </executions> + </plugin> + + </plugins> + </build> +</project> @@ -44,6 +44,7 @@ <zookeeper.version>3.4.5</zookeeper.version> <giraph.version>0.2-SNAPSHOT</giraph.version> <solr.version>4.2.1</solr.version> + <spark-smoke.version>${project.version}</spark-smoke.version> <itest-common.version>${project.version}</itest-common.version> |