diff options
author | Youngwoo Kim <warwithin@gmail.com> | 2015-02-08 18:50:50 +0900 |
---|---|---|
committer | jayunit100 <jay@apache.org> | 2015-03-01 22:56:14 -0500 |
commit | e4288e08d7bab850d8cf4f46fb5f8cd9f2a397ca (patch) | |
tree | 605fa5fdad577e9afd93bbd94122ac06e23878e3 | |
parent | d32c468e8081f13c857c26b3d9ad1e0a5ad1ac38 (diff) |
BIGTOP-1648. Update to Spark 1x to Spark 1.2.1.
Signed-off-by: jayunit100 <jay@apache.org>
17 files changed, 321 insertions, 191 deletions
diff --git a/bigtop-packages/src/common/hadoop/init-hcfs.json b/bigtop-packages/src/common/hadoop/init-hcfs.json index d8825aa3..bd97a220 100644 --- a/bigtop-packages/src/common/hadoop/init-hcfs.json +++ b/bigtop-packages/src/common/hadoop/init-hcfs.json @@ -86,7 +86,8 @@ ["/user/oozie/share/lib/hive", null, null, null], ["/user/oozie/share/lib/mapreduce-streaming", null, null, null], ["/user/oozie/share/lib/distcp", null, null, null], - ["/user/oozie/share/lib/pig", null, null, null] + ["/user/oozie/share/lib/pig", null, null, null], + ["/var/log/spark/apps","1777","spark","spark"] ], "user": [ ["tom", "0755", null], diff --git a/bigtop-packages/src/common/hadoop/init-hdfs.sh b/bigtop-packages/src/common/hadoop/init-hdfs.sh index 3a5fe361..1bf820f6 100755 --- a/bigtop-packages/src/common/hadoop/init-hdfs.sh +++ b/bigtop-packages/src/common/hadoop/init-hdfs.sh @@ -69,6 +69,11 @@ su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share/lib/hive' su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share/lib/mapreduce-streaming' su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share/lib/distcp' su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share/lib/pig' +# Event log directory for Apache Spark +su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir -p /var/log/spark/apps' +su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod -R 1777 /var/log/spark/apps' +su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown spark:spark /var/log/spark/apps' + # Copy over files from local filesystem to HDFS that oozie might need if ls /usr/lib/hive/lib/*.jar &> /dev/null; then su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -put /usr/lib/hive/lib/*.jar /user/oozie/share/lib/hive' diff --git a/bigtop-packages/src/common/spark/compute-classpath.sh b/bigtop-packages/src/common/spark/compute-classpath.sh deleted file mode 100644 index eb6a8076..00000000 --- a/bigtop-packages/src/common/spark/compute-classpath.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/bash -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script computes Spark's classpath and prints it to stdout; it's used by both the "run" -# script and the ExecutorRunner in standalone cluster mode. - -SCALA_VERSION=2.10 - -# Figure out where Spark is installed -FWDIR="$(cd `dirname $0`/..; pwd)" - -# Load environment variables from conf/spark-env.sh, if it exists -if [ -e $FWDIR/conf/spark-env.sh ] ; then - . $FWDIR/conf/spark-env.sh -fi - -CORE_DIR="$FWDIR/core" -ASSEMBLY_DIR="$FWDIR/lib" -PYSPARK_DIR="$FWDIR/python" - -# Build up classpath -CLASSPATH="$SPARK_CLASSPATH" -CLASSPATH="$CLASSPATH:$FWDIR/conf" -CLASSPATH="$CLASSPATH:$ASSEMBLY_DIR/*" -if [ -e "$PYSPARK_DIR" ]; then - for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do - CLASSPATH="$CLASSPATH:$jar" - done -fi - -# Add hadoop conf dir - else FileSystem.*, etc fail ! -# Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts -# the configuration files. - -export DEFAULT_HADOOP=/usr/lib/hadoop -export DEFAULT_HADOOP_CONF=/etc/hadoop/conf -export HADOOP_HOME=${HADOOP_HOME:-$DEFAULT_HADOOP} -export HADOOP_HDFS_HOME=${HADOOP_HDFS_HOME:-${HADOOP_HOME}/../hadoop-hdfs} -export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_HOME}/../hadoop-mapreduce} -export HADOOP_YARN_HOME=${HADOOP_YARN_HOME:-${HADOOP_HOME}/../hadoop-yarn} -export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-$DEFAULT_HADOOP_CONF} - -CLASSPATH="$CLASSPATH:$HADOOP_CONF_DIR" -if [ "x" != "x$YARN_CONF_DIR" ]; then - CLASSPATH="$CLASSPATH:$YARN_CONF_DIR" -fi -# Let's make sure that all needed hadoop libs are added properly -CLASSPATH="$CLASSPATH:$HADOOP_HOME/*:$HADOOP_HDFS_HOME/*:$HADOOP_YARN_HOME/*:$HADOOP_MAPRED_HOME/*" -# Add Scala standard library -if [ -z "$SCALA_LIBRARY_PATH" ]; then - if [ -z "$SCALA_HOME" ]; then - echo "SCALA_HOME is not set" >&2 - exit 1 - fi - SCALA_LIBRARY_PATH="$SCALA_HOME/lib" -fi -CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-library.jar" -CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-compiler.jar" -CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/jline.jar" - -echo "$CLASSPATH" diff --git a/bigtop-packages/src/common/spark/do-component-build b/bigtop-packages/src/common/spark/do-component-build index 5327c35d..deecbe2e 100644 --- a/bigtop-packages/src/common/spark/do-component-build +++ b/bigtop-packages/src/common/spark/do-component-build @@ -23,9 +23,6 @@ if [ "x$SCALA_HOME" = "x" ]; then exit 2 fi -# FIXME: this is fixed in Spark 1.x -sed -i -e '/<dependencies>/a<dependency><groupId>commons-cli</groupId><artifactId>commons-cli</artifactId><version>1.2</version></dependency>' assembly/pom.xml - BUILD_OPTS="-Divy.home=${HOME}/.ivy2 -Dsbt.ivy.home=${HOME}/.ivy2 -Duser.home=${HOME} \ -Drepo.maven.org=$IVY_MIRROR_PROP \ -Dreactor.repo=file://${HOME}/.m2/repository \ @@ -37,4 +34,4 @@ BUILD_OPTS="-Divy.home=${HOME}/.ivy2 -Dsbt.ivy.home=${HOME}/.ivy2 -Duser.home=${ # http://maven.40175.n5.nabble.com/Not-finding-artifact-in-local-repo-td3727753.html export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m -XX:PermSize=1024m -XX:MaxPermSize=1024m" -mvn -Pbigtop-dist -Pyarn -Phive $BUILD_OPTS install +mvn -Pbigtop-dist -Pyarn -Phive -Phive-thriftserver $BUILD_OPTS install diff --git a/bigtop-packages/src/common/spark/install_spark.sh b/bigtop-packages/src/common/spark/install_spark.sh index 3dfd2f6e..e28edd3b 100644 --- a/bigtop-packages/src/common/spark/install_spark.sh +++ b/bigtop-packages/src/common/spark/install_spark.sh @@ -125,6 +125,7 @@ install -d -m 0755 $PREFIX/$LIB_DIR/lib install -d -m 0755 $PREFIX/$LIB_DIR/bin install -d -m 0755 $PREFIX/$LIB_DIR/sbin install -d -m 0755 $PREFIX/$DOC_DIR +install -d -m 0755 $PREFIX/$EXAMPLES_DIR install -d -m 0755 $PREFIX/var/lib/spark/ install -d -m 0755 $PREFIX/var/log/spark/ @@ -139,19 +140,18 @@ tar --wildcards -C $PREFIX/$LIB_DIR/ -zxf ${BUILD_DIR}/assembly/target/spark-ass rm -rf $PREFIX/$LIB_DIR/bin/*.cmd # Examples jar -cp ${BUILD_DIR}/examples/target/spark-examples*${SPARK_VERSION}.jar $PREFIX/$LIB_DIR/lib -sed -i -e "s|lib/spark-examples-\*hadoop\*.jar|lib/spark-examples_\*.jar|" $PREFIX/$LIB_DIR/bin/run-example +cp ${BUILD_DIR}/examples/target/spark-examples*${SPARK_VERSION}.jar $PREFIX/$LIB_DIR/lib/spark-examples-${SPARK_VERSION}-hadoop${HADOOP_VERSION}.jar # Examples src -install -d -m 0755 $PREFIX/$EXAMPLES_DIR cp -ra ${BUILD_DIR}/examples/src $PREFIX/$EXAMPLES_DIR/ +ln -s $EXAMPLES_DIR $PREFIX/$LIB_DIR/examples + +# Data +cp -ra ${BUILD_DIR}/data $PREFIX/$LIB_DIR/ chmod 755 $PREFIX/$LIB_DIR/bin/* chmod 755 $PREFIX/$LIB_DIR/sbin/* -cp -a ${SOURCE_DIR}/compute-classpath.sh $PREFIX/$LIB_DIR/bin/ -chmod 755 $PREFIX/$LIB_DIR/bin/compute-classpath.sh - # Copy in the configuration files install -d -m 0755 $PREFIX/$CONF_DIR cp -a ${BUILD_DIR}/conf/* $PREFIX/$CONF_DIR @@ -162,7 +162,7 @@ ln -s /etc/spark/conf $PREFIX/$LIB_DIR/conf install -d -m 0755 $PREFIX/$BIN_DIR for wrap in sbin/spark-executor bin/spark-shell bin/spark-submit; do cat > $PREFIX/$BIN_DIR/`basename $wrap` <<EOF -#!/bin/bash +#!/bin/bash # Autodetect JAVA_HOME if not defined . /usr/lib/bigtop-utils/bigtop-detect-javahome @@ -173,6 +173,7 @@ EOF done cat >> $PREFIX/$CONF_DIR/spark-env.sh <<EOF +export SPARK_SCALA_VERSION=2.10 ### Let's run everything with JVM runtime, instead of Scala export SPARK_LAUNCH_WITH_SCALA=0 @@ -184,6 +185,16 @@ export SPARK_WORKER_PORT=7078 export SPARK_WORKER_WEBUI_PORT=18081 export SPARK_WORKER_DIR=/var/run/spark/work export SPARK_LOG_DIR=/var/log/spark +export SPARK_HISTORY_OPTS="\$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082" + +export HADOOP_HOME=\${HADOOP_HOME:-/usr/lib/hadoop} +export HADOOP_HDFS_HOME=\${HADOOP_HDFS_HOME:-\${HADOOP_HOME}/../hadoop-hdfs} +export HADOOP_MAPRED_HOME=\${HADOOP_MAPRED_HOME:-\${HADOOP_HOME}/../hadoop-mapreduce} +export HADOOP_YARN_HOME=\${HADOOP_YARN_HOME:-\${HADOOP_HOME}/../hadoop-yarn} +export HADOOP_CONF_DIR=\${HADOOP_CONF_DIR:-/etc/hadoop/conf} + +# Let's make sure that all needed hadoop libs are added properly +CLASSPATH="\$CLASSPATH:\$HADOOP_HOME/*:\$HADOOP_HDFS_HOME/*:\$HADOOP_YARN_HOME/*:\$HADOOP_MAPRED_HOME/*" if [ -n "\$HADOOP_HOME" ]; then export SPARK_LIBRARY_PATH=\$SPARK_LIBRARY_PATH:\${HADOOP_HOME}/lib/native @@ -201,6 +212,7 @@ EOF ln -s /var/run/spark/work $PREFIX/$LIB_DIR/work cp -r ${BUILD_DIR}/python ${PREFIX}/${INSTALLED_LIB_DIR}/ +rm -f ${PREFIX}/${INSTALLED_LIB_DIR}/python/.gitignore cat > $PREFIX/$BIN_DIR/pyspark <<EOF #!/bin/bash @@ -214,5 +226,7 @@ EOF chmod 755 $PREFIX/$BIN_DIR/pyspark touch $PREFIX/$LIB_DIR/RELEASE - cp ${BUILD_DIR}/{LICENSE,NOTICE} ${PREFIX}/${LIB_DIR}/ + +# Version-less symlinks +(cd $PREFIX/$LIB_DIR/lib; ln -s spark-assembly*.jar spark-assembly.jar; ln -s spark-examples*.jar spark-examples.jar) diff --git a/bigtop-packages/src/common/spark/spark-history-server.svc b/bigtop-packages/src/common/spark/spark-history-server.svc new file mode 100644 index 00000000..0410d160 --- /dev/null +++ b/bigtop-packages/src/common/spark/spark-history-server.svc @@ -0,0 +1,70 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +TYPE="history-server" +DAEMON="spark-${TYPE}" +DESC="Spark ${TYPE}" +EXEC_PATH="/usr/lib/spark/bin/spark-class" +SVC_USER="spark" +WORKING_DIR="/var/lib/spark" +DAEMON_FLAGS="" +CONF_DIR="/etc/spark/conf" +PIDFILE="/var/run/spark/${DAEMON}.pid" + +generate_start() { + +cat <<'__EOT__' +start() { + [ -x $EXE_FILE ] || exit $ERROR_PROGRAM_NOT_INSTALLED + log_success_msg "Starting $DESC (${DAEMON}): " + + checkstatusofproc + status=$? + if [ "$status" -eq "$STATUS_RUNNING" ]; then + log_success_msg "${DESC} is running" + exit 0 + fi + + LOG_FILE=/var/log/spark/${DAEMON}.out + + su -s /bin/bash $SVC_USER -c "nohup nice -n 0 \ + ${EXEC_PATH} org.apache.spark.deploy.history.HistoryServer $DAEMON_FLAGS \ + > $LOG_FILE 2>&1 & "'echo $!' > "$PIDFILE" + + sleep 3 + + checkstatusofproc + RETVAL=$? + [ $RETVAL -eq $STATUS_RUNNING ] && touch $LOCKFILE + return $RETVAL +} +__EOT__ + +} + +generate_stop() { + +cat <<'__EOT__' +stop() { + log_success_msg "Stopping $DESC (${DAEMON}): " + killproc -p $PIDFILE java + RETVAL=$? + + [ $RETVAL -eq $RETVAL_SUCCESS ] && rm -f $LOCKFILE $PIDFILE + return $RETVAL +} +__EOT__ + +} diff --git a/bigtop-packages/src/common/spark/spark-thriftserver.svc b/bigtop-packages/src/common/spark/spark-thriftserver.svc new file mode 100644 index 00000000..4eba4a4a --- /dev/null +++ b/bigtop-packages/src/common/spark/spark-thriftserver.svc @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +TYPE="thriftserver" +DAEMON="spark-${TYPE}" +DESC="Spark ${TYPE}" +EXEC_PATH="/usr/lib/spark/sbin/spark-daemon.sh" +SVC_USER="spark" +WORKING_DIR="/var/lib/spark" +DAEMON_FLAGS="" +CONF_DIR="/etc/spark/conf" +PIDFILE="/var/run/spark/${DAEMON}.pid" + +generate_start() { + +cat <<'__EOT__' +start() { + [ -x $EXE_FILE ] || exit $ERROR_PROGRAM_NOT_INSTALLED + log_success_msg "Starting $DESC (${DAEMON}): " + + checkstatusofproc + status=$? + if [ "$status" -eq "$STATUS_RUNNING" ]; then + log_success_msg "${DESC} is running" + exit 0 + fi + + function usage { + echo + } + export SUBMIT_USAGE_FUNCTION=usage + + su -s /bin/bash $SVC_USER -c " \ + ${EXEC_PATH} spark-submit org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 1 $DAEMON_FLAGS" + + sleep 3 + + checkstatusofproc + RETVAL=$? + [ $RETVAL -eq $STATUS_RUNNING ] && touch $LOCKFILE + return $RETVAL +} +__EOT__ + +} + +generate_stop() { + +cat <<'__EOT__' +stop() { + su -s /bin/bash $SVC_USER -c " \ + ${EXEC_PATH} stop org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 1" + RETVAL=$? + + [ $RETVAL -eq $RETVAL_SUCCESS ] && rm -f $LOCKFILE $PIDFILE + return $RETVAL +} +__EOT__ + +} diff --git a/bigtop-packages/src/deb/spark/control b/bigtop-packages/src/deb/spark/control index ae2f07eb..856d622e 100644 --- a/bigtop-packages/src/deb/spark/control +++ b/bigtop-packages/src/deb/spark/control @@ -23,7 +23,7 @@ Homepage: http://spark.apache.org/ Package: spark-core Architecture: all -Depends: bigtop-utils (>= 0.7), hadoop-client +Depends: adduser, bigtop-utils (>= 0.7), hadoop-client Description: Lightning-Fast Cluster Computing Spark is a MapReduce-like cluster computing framework designed to support low-latency iterative jobs and interactive use from an interpreter. It is @@ -48,3 +48,15 @@ Architecture: all Depends: spark-core (= ${source:Version}), python Description: Python client for Spark Includes PySpark, an interactive Python shell for Spark, and related libraries + +Package: spark-history-server +Architecture: all +Depends: spark-core (= ${source:Version}) +Description: History server for Apache Spark + History server for Apache Spark + +Package: spark-thriftserver +Architecture: all +Depends: spark-core (= ${source:Version}) +Description: Thrift server for Spark SQL + Thrift server for Spark SQL diff --git a/bigtop-packages/src/deb/spark/rules b/bigtop-packages/src/deb/spark/rules index a21ba820..58815189 100644 --- a/bigtop-packages/src/deb/spark/rules +++ b/bigtop-packages/src/deb/spark/rules @@ -28,7 +28,7 @@ export DH_OPTIONS override_dh_auto_build: bash debian/do-component-build -svcs=spark-master spark-worker +svcs=spark-master spark-worker spark-history-server spark-thriftserver $(svcs): debian/init.d.tmpl bash $< debian/$@.svc deb debian/$@.init @@ -36,7 +36,7 @@ $(svcs): debian/init.d.tmpl override_dh_auto_install: $(svcs) bash -x debian/install_spark.sh \ - --build-dir=`pwd` \ - --doc-dir=/usr/share/doc/spark \ - --source-dir=debian \ - --prefix=debian/tmp + --build-dir=`pwd` \ + --doc-dir=/usr/share/doc/spark \ + --source-dir=debian \ + --prefix=debian/tmp diff --git a/bigtop-packages/src/deb/spark/spark-core.install b/bigtop-packages/src/deb/spark/spark-core.install index d1efdfdb..77a6bd8b 100644 --- a/bigtop-packages/src/deb/spark/spark-core.install +++ b/bigtop-packages/src/deb/spark/spark-core.install @@ -2,16 +2,24 @@ /usr/bin/spark-executor /usr/bin/spark-submit /usr/bin/spark-shell -/usr/lib/spark/bin/ -/usr/lib/spark/lib -/usr/lib/spark/sbin -/usr/lib/spark/ui-resources -/usr/lib/spark/conf /usr/lib/spark/LICENSE -/usr/lib/spark/NOTICE /usr/lib/spark/RELEASE -/usr/share/doc/spark +/usr/lib/spark/NOTICE +/usr/lib/spark/bin/beeline +/usr/lib/spark/bin/compute-classpath.sh +/usr/lib/spark/bin/load-spark-env.sh +/usr/lib/spark/bin/run-example +/usr/lib/spark/bin/spark-class +/usr/lib/spark/bin/spark-shell +/usr/lib/spark/bin/spark-sql +/usr/lib/spark/bin/spark-submit +/usr/lib/spark/bin/utils.sh +/usr/lib/spark/conf +/usr/lib/spark/sbin /usr/lib/spark/work +/usr/lib/spark/examples +/usr/lib/spark/data +/usr/share/doc/spark /var/lib/spark/ /var/log/spark/ /var/run/spark/ diff --git a/bigtop-packages/src/rpm/spark/SPECS/spark.spec b/bigtop-packages/src/rpm/spark/SPECS/spark.spec index 81d658e8..d8a5c10c 100644 --- a/bigtop-packages/src/rpm/spark/SPECS/spark.spec +++ b/bigtop-packages/src/rpm/spark/SPECS/spark.spec @@ -23,7 +23,7 @@ %define config_spark %{etc_spark}/conf %define bin /usr/bin %define man_dir /usr/share/man -%define spark_services master worker +%define spark_services master worker history-server thriftserver %if %{?suse_version:1}0 %define doc_spark %{_docdir}/spark @@ -50,8 +50,10 @@ Source1: do-component-build Source2: install_%{spark_name}.sh Source3: spark-master.svc Source4: spark-worker.svc -Source5: compute-classpath.sh Source6: init.d.tmpl +Source7: spark-history-server.svc +Source8: spark-thriftserver.svc +Source9: bigtop.bom Requires: bigtop-utils >= 0.7, hadoop-client Requires(preun): /sbin/service @@ -101,6 +103,22 @@ Requires: spark-core = %{version}-%{release}, python %description -n spark-python Includes PySpark, an interactive Python shell for Spark, and related libraries +%package -n spark-history-server +Summary: History server for Apache Spark +Group: Development/Libraries +Requires: spark-core = %{version}-%{release} + +%description -n spark-history-server +History server for Apache Spark + +%package -n spark-thriftserver +Summary: Thrift server for Spark SQL +Group: Development/Libraries +Requires: spark-core = %{version}-%{release} + +%description -n spark-thriftserver +Thrift server for Spark SQL + %prep %setup -n %{spark_name}-%{spark_base_version} @@ -151,17 +169,22 @@ done %defattr(-,root,root,755) %config(noreplace) %{config_spark}.dist %doc %{doc_spark} -%{lib_spark} +%{lib_spark}/conf +%{lib_spark}/LICENSE +%{lib_spark}/RELEASE +%{lib_spark}/NOTICE %{lib_spark}/bin -%{lib_spark}/sbin %{lib_spark}/lib -%exclude %{lib_spark}/bin/pyspark +%{lib_spark}/sbin +%{lib_spark}/data +%{lib_spark}/examples +%{lib_spark}/work +%exclude %{bin_spark}/pyspark %exclude %{lib_spark}/python %{etc_spark} %attr(0755,spark,spark) %{var_lib_spark} %attr(0755,spark,spark) %{var_run_spark} %attr(0755,spark,spark) %{var_log_spark} -%attr(0755,root,root) %{bin_spark} %{bin}/spark-shell %{bin}/spark-executor %{bin}/spark-submit @@ -189,3 +212,5 @@ if [ $1 -ge 1 ]; then \ fi %service_macro spark-master %service_macro spark-worker +%service_macro spark-history-server +%service_macro spark-thriftserver diff --git a/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkExample.groovy b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkExample.groovy new file mode 100644 index 00000000..02cd161f --- /dev/null +++ b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkExample.groovy @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bigtop.itest.spark + +import org.apache.bigtop.itest.shell.Shell +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path + +import org.junit.Test +import org.junit.BeforeClass +import org.junit.AfterClass +import static org.junit.Assert.assertEquals +import static org.junit.Assert.assertTrue +import static org.junit.Assert.assertNotNull + +import static org.apache.bigtop.itest.LogErrorsUtils.logError + +public class TestSparkExample { + + private static String SPARK_HOME = System.getenv("SPARK_HOME"); + private static String SPARK_MASTER = System.getenv("SPARK_MASTER"); + static { + assertNotNull("SPARK_HOME has to be set to run this test", SPARK_HOME); + assertNotNull("SPARK_MASTER has to be set to run this test", SPARK_MASTER); + } + static final String SPARK_EXAMPLES_DIR = SPARK_HOME + "/examples"; + static final String sparkExamplesJarFile = "spark-examples.jar"; + static final String SPARK_EXAMPLES_JAR = SPARK_HOME + "/lib/" + sparkExamplesJarFile; + + static Shell sh = new Shell("/bin/bash -s"); + + @BeforeClass + static void setUp() { + + } + + @AfterClass + public static void tearDown() { + + } + + @Test + void testSparkExample() { + def examples = ["SparkPi", "JavaSparkPi"]; + examples.each() { + String exampleClass = "org.apache.spark.examples.${it}" + sh.exec("cd ${SPARK_HOME} && ./bin/spark-submit --class " + exampleClass + " --master ${SPARK_MASTER} " + SPARK_EXAMPLES_JAR); + logError(sh); + assertTrue("Running Spark example ${it} failed", sh.getRet() == 0); + } + } + + @Test + void testSparkPythonExample() { + def pyExamples = ["pi.py"]; + pyExamples.each() { + sh.exec("cd ${SPARK_HOME} && ./bin/spark-submit --master ${SPARK_MASTER} " + SPARK_EXAMPLES_DIR + "/src/main/python/${it}"); + logError(sh); + assertTrue("Running Spark Python example {it} failed", sh.getRet() == 0); + } + } + +} diff --git a/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy index 4fcb67e9..4c601a0b 100644 --- a/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy +++ b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy @@ -40,7 +40,7 @@ public class TestSparkSmoke implements Serializable { private static String pwd = "" private static Configuration conf static Shell sh = new Shell("/bin/bash -s") - def result = ["0.2: 3", "0.1: 3", "0.0: 3", "9.0: 3", "9.2: 3", "9.1: 3"] + def result = ["9.1: 3", "9.2: 3", "0.2: 3", "9.0: 3", "0.0: 3", "0.1: 3"] @BeforeClass static void setUp() { @@ -53,7 +53,7 @@ public class TestSparkSmoke implements Serializable { @Test void ShellTest() { String kmeans = "file://" + pwd + "/kmeans_data.txt" - sh.exec("cd ${SPARK_HOME} && ./spark-class org.apache.spark.examples.JavaWordCount local " + kmeans) + sh.exec("cd ${SPARK_HOME} && ./bin/spark-submit --class org.apache.spark.examples.JavaWordCount --master local lib/spark-examples.jar " + kmeans) logError(sh) assertEquals(result, sh.out) } @@ -68,7 +68,7 @@ public class TestSparkSmoke implements Serializable { fs.close() String dfsname = fs_default_name + pathname - sh.exec("cd ${SPARK_HOME} && ./spark-class org.apache.spark.examples.JavaWordCount ${SPARK_MASTER} " + dfsname) + sh.exec("cd ${SPARK_HOME} && ./bin/spark-submit --class org.apache.spark.examples.JavaWordCount --master ${SPARK_MASTER} lib/spark-examples.jar " + dfsname) logError(sh) assertEquals(result, sh.out) } diff --git a/bigtop-tests/test-artifacts/spark/src/main/resources/README.md b/bigtop-tests/test-artifacts/spark/src/main/resources/README.md deleted file mode 100644 index d042d7e9..00000000 --- a/bigtop-tests/test-artifacts/spark/src/main/resources/README.md +++ /dev/null @@ -1,73 +0,0 @@ -# Spark - -Lightning-Fast Cluster Computing - <http://spark.incubator.apache.org/> - - -## Online Documentation - -You can find the latest Spark documentation, including a programming -guide, on the project webpage at <http://spark.incubator.apache.org/documentation.html>. -This README file only contains basic setup instructions. - - -## Building - -Spark requires Scala 2.9.2 (Scala 2.10 is not yet supported). The project is -built using Simple Build Tool (SBT), which is packaged with it. To build -Spark and its example programs, run: - - sbt/sbt package - -Spark also supports building using Maven. If you would like to build using Maven, -see the [instructions for building Spark with Maven](http://spark.incubator.apache.org/docs/latest/building-with-maven.html) -in the spark documentation.. - -To run Spark, you will need to have Scala's bin directory in your `PATH`, or -you will need to set the `SCALA_HOME` environment variable to point to where -you've installed Scala. Scala must be accessible through one of these -methods on your cluster's worker nodes as well as its master. - -To run one of the examples, use `./run <class> <params>`. For example: - - ./run spark.examples.SparkLR local[2] - -will run the Logistic Regression example locally on 2 CPUs. - -Each of the example programs prints usage help if no params are given. - -All of the Spark samples take a `<host>` parameter that is the cluster URL -to connect to. This can be a mesos:// or spark:// URL, or "local" to run -locally with one thread, or "local[N]" to run locally with N threads. - - -## A Note About Hadoop Versions - -Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported -storage systems. Because the HDFS API has changed in different versions of -Hadoop, you must build Spark against the same version that your cluster runs. -You can change the version by setting the `HADOOP_VERSION` variable at the top -of `project/SparkBuild.scala`, then rebuilding Spark. - - -## Configuration - -Please refer to the "Configuration" guide in the online documentation for a -full overview on how to configure Spark. At the minimum, you will need to -create a `conf/spark-env.sh` script (copy `conf/spark-env.sh.template`) and -set the following two variables: - -- `SCALA_HOME`: Location where Scala is installed. - -- `MESOS_NATIVE_LIBRARY`: Your Mesos library (only needed if you want to run - on Mesos). For example, this might be `/usr/local/lib/libmesos.so` on Linux. - - -## Contributing to Spark - -Contributions via GitHub pull requests are gladly accepted from their original -author. Along with any pull requests, please state that the contribution is -your original work and that you license the work to the project under the -project's open source license. Whether or not you state this explicitly, by -submitting any copyrighted material via pull request, email, or other means -you agree to license the material under the project's open source license and -warrant that you have the legal authority to do so. diff --git a/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt b/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt deleted file mode 100644 index 338664f7..00000000 --- a/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt +++ /dev/null @@ -1,6 +0,0 @@ -0.0 0.0 0.0 -0.1 0.1 0.1 -0.2 0.2 0.2 -9.0 9.0 9.0 -9.1 9.1 9.1 -9.2 9.2 9.2 @@ -247,8 +247,8 @@ $(eval $(call PACKAGE,crunch,CRUNCH)) SPARK_NAME=spark SPARK_RELNOTES_NAME=Spark SPARK_PKG_NAME=spark-core -SPARK_BASE_VERSION=1.1.0 -SPARK_PKG_VERSION=1.1.0 +SPARK_BASE_VERSION=1.2.1 +SPARK_PKG_VERSION=1.2.1 SPARK_RELEASE_VERSION=1 SPARK_TARBALL_DST=spark-$(SPARK_BASE_VERSION).tar.gz SPARK_TARBALL_SRC=spark-$(SPARK_BASE_VERSION).tgz @@ -48,7 +48,7 @@ <zookeeper.version>3.4.5</zookeeper.version> <giraph.version>1.0.0</giraph.version> <solr.version>4.6.0</solr.version> - <spark.version>0.9.1</spark.version> + <spark.version>1.2.1</spark.version> <kafka.version>0.8.1.1</kafka.version> <phoenix.version>4.2.2</phoenix.version> <spark-smoke.version>${project.version}</spark-smoke.version> |