aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bigtop-packages/src/common/hadoop/init-hcfs.json3
-rwxr-xr-xbigtop-packages/src/common/hadoop/init-hdfs.sh5
-rw-r--r--bigtop-packages/src/common/spark/compute-classpath.sh74
-rw-r--r--bigtop-packages/src/common/spark/do-component-build5
-rw-r--r--bigtop-packages/src/common/spark/install_spark.sh30
-rw-r--r--bigtop-packages/src/common/spark/spark-history-server.svc70
-rw-r--r--bigtop-packages/src/common/spark/spark-thriftserver.svc72
-rw-r--r--bigtop-packages/src/deb/spark/control14
-rw-r--r--bigtop-packages/src/deb/spark/rules10
-rw-r--r--bigtop-packages/src/deb/spark/spark-core.install22
-rw-r--r--bigtop-packages/src/rpm/spark/SPECS/spark.spec37
-rw-r--r--bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkExample.groovy79
-rw-r--r--bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy6
-rw-r--r--bigtop-tests/test-artifacts/spark/src/main/resources/README.md73
-rw-r--r--bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt6
-rw-r--r--bigtop.mk4
-rw-r--r--pom.xml2
17 files changed, 321 insertions, 191 deletions
diff --git a/bigtop-packages/src/common/hadoop/init-hcfs.json b/bigtop-packages/src/common/hadoop/init-hcfs.json
index d8825aa3..bd97a220 100644
--- a/bigtop-packages/src/common/hadoop/init-hcfs.json
+++ b/bigtop-packages/src/common/hadoop/init-hcfs.json
@@ -86,7 +86,8 @@
["/user/oozie/share/lib/hive", null, null, null],
["/user/oozie/share/lib/mapreduce-streaming", null, null, null],
["/user/oozie/share/lib/distcp", null, null, null],
- ["/user/oozie/share/lib/pig", null, null, null]
+ ["/user/oozie/share/lib/pig", null, null, null],
+ ["/var/log/spark/apps","1777","spark","spark"]
],
"user": [
["tom", "0755", null],
diff --git a/bigtop-packages/src/common/hadoop/init-hdfs.sh b/bigtop-packages/src/common/hadoop/init-hdfs.sh
index 3a5fe361..1bf820f6 100755
--- a/bigtop-packages/src/common/hadoop/init-hdfs.sh
+++ b/bigtop-packages/src/common/hadoop/init-hdfs.sh
@@ -69,6 +69,11 @@ su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share/lib/hive'
su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share/lib/mapreduce-streaming'
su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share/lib/distcp'
su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share/lib/pig'
+# Event log directory for Apache Spark
+su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir -p /var/log/spark/apps'
+su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod -R 1777 /var/log/spark/apps'
+su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown spark:spark /var/log/spark/apps'
+
# Copy over files from local filesystem to HDFS that oozie might need
if ls /usr/lib/hive/lib/*.jar &> /dev/null; then
su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -put /usr/lib/hive/lib/*.jar /user/oozie/share/lib/hive'
diff --git a/bigtop-packages/src/common/spark/compute-classpath.sh b/bigtop-packages/src/common/spark/compute-classpath.sh
deleted file mode 100644
index eb6a8076..00000000
--- a/bigtop-packages/src/common/spark/compute-classpath.sh
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
-# script and the ExecutorRunner in standalone cluster mode.
-
-SCALA_VERSION=2.10
-
-# Figure out where Spark is installed
-FWDIR="$(cd `dirname $0`/..; pwd)"
-
-# Load environment variables from conf/spark-env.sh, if it exists
-if [ -e $FWDIR/conf/spark-env.sh ] ; then
- . $FWDIR/conf/spark-env.sh
-fi
-
-CORE_DIR="$FWDIR/core"
-ASSEMBLY_DIR="$FWDIR/lib"
-PYSPARK_DIR="$FWDIR/python"
-
-# Build up classpath
-CLASSPATH="$SPARK_CLASSPATH"
-CLASSPATH="$CLASSPATH:$FWDIR/conf"
-CLASSPATH="$CLASSPATH:$ASSEMBLY_DIR/*"
-if [ -e "$PYSPARK_DIR" ]; then
- for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
- CLASSPATH="$CLASSPATH:$jar"
- done
-fi
-
-# Add hadoop conf dir - else FileSystem.*, etc fail !
-# Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
-# the configuration files.
-
-export DEFAULT_HADOOP=/usr/lib/hadoop
-export DEFAULT_HADOOP_CONF=/etc/hadoop/conf
-export HADOOP_HOME=${HADOOP_HOME:-$DEFAULT_HADOOP}
-export HADOOP_HDFS_HOME=${HADOOP_HDFS_HOME:-${HADOOP_HOME}/../hadoop-hdfs}
-export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_HOME}/../hadoop-mapreduce}
-export HADOOP_YARN_HOME=${HADOOP_YARN_HOME:-${HADOOP_HOME}/../hadoop-yarn}
-export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-$DEFAULT_HADOOP_CONF}
-
-CLASSPATH="$CLASSPATH:$HADOOP_CONF_DIR"
-if [ "x" != "x$YARN_CONF_DIR" ]; then
- CLASSPATH="$CLASSPATH:$YARN_CONF_DIR"
-fi
-# Let's make sure that all needed hadoop libs are added properly
-CLASSPATH="$CLASSPATH:$HADOOP_HOME/*:$HADOOP_HDFS_HOME/*:$HADOOP_YARN_HOME/*:$HADOOP_MAPRED_HOME/*"
-# Add Scala standard library
-if [ -z "$SCALA_LIBRARY_PATH" ]; then
- if [ -z "$SCALA_HOME" ]; then
- echo "SCALA_HOME is not set" >&2
- exit 1
- fi
- SCALA_LIBRARY_PATH="$SCALA_HOME/lib"
-fi
-CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-library.jar"
-CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-compiler.jar"
-CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/jline.jar"
-
-echo "$CLASSPATH"
diff --git a/bigtop-packages/src/common/spark/do-component-build b/bigtop-packages/src/common/spark/do-component-build
index 5327c35d..deecbe2e 100644
--- a/bigtop-packages/src/common/spark/do-component-build
+++ b/bigtop-packages/src/common/spark/do-component-build
@@ -23,9 +23,6 @@ if [ "x$SCALA_HOME" = "x" ]; then
exit 2
fi
-# FIXME: this is fixed in Spark 1.x
-sed -i -e '/<dependencies>/a<dependency><groupId>commons-cli</groupId><artifactId>commons-cli</artifactId><version>1.2</version></dependency>' assembly/pom.xml
-
BUILD_OPTS="-Divy.home=${HOME}/.ivy2 -Dsbt.ivy.home=${HOME}/.ivy2 -Duser.home=${HOME} \
-Drepo.maven.org=$IVY_MIRROR_PROP \
-Dreactor.repo=file://${HOME}/.m2/repository \
@@ -37,4 +34,4 @@ BUILD_OPTS="-Divy.home=${HOME}/.ivy2 -Dsbt.ivy.home=${HOME}/.ivy2 -Duser.home=${
# http://maven.40175.n5.nabble.com/Not-finding-artifact-in-local-repo-td3727753.html
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m -XX:PermSize=1024m -XX:MaxPermSize=1024m"
-mvn -Pbigtop-dist -Pyarn -Phive $BUILD_OPTS install
+mvn -Pbigtop-dist -Pyarn -Phive -Phive-thriftserver $BUILD_OPTS install
diff --git a/bigtop-packages/src/common/spark/install_spark.sh b/bigtop-packages/src/common/spark/install_spark.sh
index 3dfd2f6e..e28edd3b 100644
--- a/bigtop-packages/src/common/spark/install_spark.sh
+++ b/bigtop-packages/src/common/spark/install_spark.sh
@@ -125,6 +125,7 @@ install -d -m 0755 $PREFIX/$LIB_DIR/lib
install -d -m 0755 $PREFIX/$LIB_DIR/bin
install -d -m 0755 $PREFIX/$LIB_DIR/sbin
install -d -m 0755 $PREFIX/$DOC_DIR
+install -d -m 0755 $PREFIX/$EXAMPLES_DIR
install -d -m 0755 $PREFIX/var/lib/spark/
install -d -m 0755 $PREFIX/var/log/spark/
@@ -139,19 +140,18 @@ tar --wildcards -C $PREFIX/$LIB_DIR/ -zxf ${BUILD_DIR}/assembly/target/spark-ass
rm -rf $PREFIX/$LIB_DIR/bin/*.cmd
# Examples jar
-cp ${BUILD_DIR}/examples/target/spark-examples*${SPARK_VERSION}.jar $PREFIX/$LIB_DIR/lib
-sed -i -e "s|lib/spark-examples-\*hadoop\*.jar|lib/spark-examples_\*.jar|" $PREFIX/$LIB_DIR/bin/run-example
+cp ${BUILD_DIR}/examples/target/spark-examples*${SPARK_VERSION}.jar $PREFIX/$LIB_DIR/lib/spark-examples-${SPARK_VERSION}-hadoop${HADOOP_VERSION}.jar
# Examples src
-install -d -m 0755 $PREFIX/$EXAMPLES_DIR
cp -ra ${BUILD_DIR}/examples/src $PREFIX/$EXAMPLES_DIR/
+ln -s $EXAMPLES_DIR $PREFIX/$LIB_DIR/examples
+
+# Data
+cp -ra ${BUILD_DIR}/data $PREFIX/$LIB_DIR/
chmod 755 $PREFIX/$LIB_DIR/bin/*
chmod 755 $PREFIX/$LIB_DIR/sbin/*
-cp -a ${SOURCE_DIR}/compute-classpath.sh $PREFIX/$LIB_DIR/bin/
-chmod 755 $PREFIX/$LIB_DIR/bin/compute-classpath.sh
-
# Copy in the configuration files
install -d -m 0755 $PREFIX/$CONF_DIR
cp -a ${BUILD_DIR}/conf/* $PREFIX/$CONF_DIR
@@ -162,7 +162,7 @@ ln -s /etc/spark/conf $PREFIX/$LIB_DIR/conf
install -d -m 0755 $PREFIX/$BIN_DIR
for wrap in sbin/spark-executor bin/spark-shell bin/spark-submit; do
cat > $PREFIX/$BIN_DIR/`basename $wrap` <<EOF
-#!/bin/bash
+#!/bin/bash
# Autodetect JAVA_HOME if not defined
. /usr/lib/bigtop-utils/bigtop-detect-javahome
@@ -173,6 +173,7 @@ EOF
done
cat >> $PREFIX/$CONF_DIR/spark-env.sh <<EOF
+export SPARK_SCALA_VERSION=2.10
### Let's run everything with JVM runtime, instead of Scala
export SPARK_LAUNCH_WITH_SCALA=0
@@ -184,6 +185,16 @@ export SPARK_WORKER_PORT=7078
export SPARK_WORKER_WEBUI_PORT=18081
export SPARK_WORKER_DIR=/var/run/spark/work
export SPARK_LOG_DIR=/var/log/spark
+export SPARK_HISTORY_OPTS="\$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082"
+
+export HADOOP_HOME=\${HADOOP_HOME:-/usr/lib/hadoop}
+export HADOOP_HDFS_HOME=\${HADOOP_HDFS_HOME:-\${HADOOP_HOME}/../hadoop-hdfs}
+export HADOOP_MAPRED_HOME=\${HADOOP_MAPRED_HOME:-\${HADOOP_HOME}/../hadoop-mapreduce}
+export HADOOP_YARN_HOME=\${HADOOP_YARN_HOME:-\${HADOOP_HOME}/../hadoop-yarn}
+export HADOOP_CONF_DIR=\${HADOOP_CONF_DIR:-/etc/hadoop/conf}
+
+# Let's make sure that all needed hadoop libs are added properly
+CLASSPATH="\$CLASSPATH:\$HADOOP_HOME/*:\$HADOOP_HDFS_HOME/*:\$HADOOP_YARN_HOME/*:\$HADOOP_MAPRED_HOME/*"
if [ -n "\$HADOOP_HOME" ]; then
export SPARK_LIBRARY_PATH=\$SPARK_LIBRARY_PATH:\${HADOOP_HOME}/lib/native
@@ -201,6 +212,7 @@ EOF
ln -s /var/run/spark/work $PREFIX/$LIB_DIR/work
cp -r ${BUILD_DIR}/python ${PREFIX}/${INSTALLED_LIB_DIR}/
+rm -f ${PREFIX}/${INSTALLED_LIB_DIR}/python/.gitignore
cat > $PREFIX/$BIN_DIR/pyspark <<EOF
#!/bin/bash
@@ -214,5 +226,7 @@ EOF
chmod 755 $PREFIX/$BIN_DIR/pyspark
touch $PREFIX/$LIB_DIR/RELEASE
-
cp ${BUILD_DIR}/{LICENSE,NOTICE} ${PREFIX}/${LIB_DIR}/
+
+# Version-less symlinks
+(cd $PREFIX/$LIB_DIR/lib; ln -s spark-assembly*.jar spark-assembly.jar; ln -s spark-examples*.jar spark-examples.jar)
diff --git a/bigtop-packages/src/common/spark/spark-history-server.svc b/bigtop-packages/src/common/spark/spark-history-server.svc
new file mode 100644
index 00000000..0410d160
--- /dev/null
+++ b/bigtop-packages/src/common/spark/spark-history-server.svc
@@ -0,0 +1,70 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+TYPE="history-server"
+DAEMON="spark-${TYPE}"
+DESC="Spark ${TYPE}"
+EXEC_PATH="/usr/lib/spark/bin/spark-class"
+SVC_USER="spark"
+WORKING_DIR="/var/lib/spark"
+DAEMON_FLAGS=""
+CONF_DIR="/etc/spark/conf"
+PIDFILE="/var/run/spark/${DAEMON}.pid"
+
+generate_start() {
+
+cat <<'__EOT__'
+start() {
+ [ -x $EXE_FILE ] || exit $ERROR_PROGRAM_NOT_INSTALLED
+ log_success_msg "Starting $DESC (${DAEMON}): "
+
+ checkstatusofproc
+ status=$?
+ if [ "$status" -eq "$STATUS_RUNNING" ]; then
+ log_success_msg "${DESC} is running"
+ exit 0
+ fi
+
+ LOG_FILE=/var/log/spark/${DAEMON}.out
+
+ su -s /bin/bash $SVC_USER -c "nohup nice -n 0 \
+ ${EXEC_PATH} org.apache.spark.deploy.history.HistoryServer $DAEMON_FLAGS \
+ > $LOG_FILE 2>&1 & "'echo $!' > "$PIDFILE"
+
+ sleep 3
+
+ checkstatusofproc
+ RETVAL=$?
+ [ $RETVAL -eq $STATUS_RUNNING ] && touch $LOCKFILE
+ return $RETVAL
+}
+__EOT__
+
+}
+
+generate_stop() {
+
+cat <<'__EOT__'
+stop() {
+ log_success_msg "Stopping $DESC (${DAEMON}): "
+ killproc -p $PIDFILE java
+ RETVAL=$?
+
+ [ $RETVAL -eq $RETVAL_SUCCESS ] && rm -f $LOCKFILE $PIDFILE
+ return $RETVAL
+}
+__EOT__
+
+}
diff --git a/bigtop-packages/src/common/spark/spark-thriftserver.svc b/bigtop-packages/src/common/spark/spark-thriftserver.svc
new file mode 100644
index 00000000..4eba4a4a
--- /dev/null
+++ b/bigtop-packages/src/common/spark/spark-thriftserver.svc
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+TYPE="thriftserver"
+DAEMON="spark-${TYPE}"
+DESC="Spark ${TYPE}"
+EXEC_PATH="/usr/lib/spark/sbin/spark-daemon.sh"
+SVC_USER="spark"
+WORKING_DIR="/var/lib/spark"
+DAEMON_FLAGS=""
+CONF_DIR="/etc/spark/conf"
+PIDFILE="/var/run/spark/${DAEMON}.pid"
+
+generate_start() {
+
+cat <<'__EOT__'
+start() {
+ [ -x $EXE_FILE ] || exit $ERROR_PROGRAM_NOT_INSTALLED
+ log_success_msg "Starting $DESC (${DAEMON}): "
+
+ checkstatusofproc
+ status=$?
+ if [ "$status" -eq "$STATUS_RUNNING" ]; then
+ log_success_msg "${DESC} is running"
+ exit 0
+ fi
+
+ function usage {
+ echo
+ }
+ export SUBMIT_USAGE_FUNCTION=usage
+
+ su -s /bin/bash $SVC_USER -c " \
+ ${EXEC_PATH} spark-submit org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 1 $DAEMON_FLAGS"
+
+ sleep 3
+
+ checkstatusofproc
+ RETVAL=$?
+ [ $RETVAL -eq $STATUS_RUNNING ] && touch $LOCKFILE
+ return $RETVAL
+}
+__EOT__
+
+}
+
+generate_stop() {
+
+cat <<'__EOT__'
+stop() {
+ su -s /bin/bash $SVC_USER -c " \
+ ${EXEC_PATH} stop org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 1"
+ RETVAL=$?
+
+ [ $RETVAL -eq $RETVAL_SUCCESS ] && rm -f $LOCKFILE $PIDFILE
+ return $RETVAL
+}
+__EOT__
+
+}
diff --git a/bigtop-packages/src/deb/spark/control b/bigtop-packages/src/deb/spark/control
index ae2f07eb..856d622e 100644
--- a/bigtop-packages/src/deb/spark/control
+++ b/bigtop-packages/src/deb/spark/control
@@ -23,7 +23,7 @@ Homepage: http://spark.apache.org/
Package: spark-core
Architecture: all
-Depends: bigtop-utils (>= 0.7), hadoop-client
+Depends: adduser, bigtop-utils (>= 0.7), hadoop-client
Description: Lightning-Fast Cluster Computing
Spark is a MapReduce-like cluster computing framework designed to support
low-latency iterative jobs and interactive use from an interpreter. It is
@@ -48,3 +48,15 @@ Architecture: all
Depends: spark-core (= ${source:Version}), python
Description: Python client for Spark
Includes PySpark, an interactive Python shell for Spark, and related libraries
+
+Package: spark-history-server
+Architecture: all
+Depends: spark-core (= ${source:Version})
+Description: History server for Apache Spark
+ History server for Apache Spark
+
+Package: spark-thriftserver
+Architecture: all
+Depends: spark-core (= ${source:Version})
+Description: Thrift server for Spark SQL
+ Thrift server for Spark SQL
diff --git a/bigtop-packages/src/deb/spark/rules b/bigtop-packages/src/deb/spark/rules
index a21ba820..58815189 100644
--- a/bigtop-packages/src/deb/spark/rules
+++ b/bigtop-packages/src/deb/spark/rules
@@ -28,7 +28,7 @@ export DH_OPTIONS
override_dh_auto_build:
bash debian/do-component-build
-svcs=spark-master spark-worker
+svcs=spark-master spark-worker spark-history-server spark-thriftserver
$(svcs): debian/init.d.tmpl
bash $< debian/$@.svc deb debian/$@.init
@@ -36,7 +36,7 @@ $(svcs): debian/init.d.tmpl
override_dh_auto_install: $(svcs)
bash -x debian/install_spark.sh \
- --build-dir=`pwd` \
- --doc-dir=/usr/share/doc/spark \
- --source-dir=debian \
- --prefix=debian/tmp
+ --build-dir=`pwd` \
+ --doc-dir=/usr/share/doc/spark \
+ --source-dir=debian \
+ --prefix=debian/tmp
diff --git a/bigtop-packages/src/deb/spark/spark-core.install b/bigtop-packages/src/deb/spark/spark-core.install
index d1efdfdb..77a6bd8b 100644
--- a/bigtop-packages/src/deb/spark/spark-core.install
+++ b/bigtop-packages/src/deb/spark/spark-core.install
@@ -2,16 +2,24 @@
/usr/bin/spark-executor
/usr/bin/spark-submit
/usr/bin/spark-shell
-/usr/lib/spark/bin/
-/usr/lib/spark/lib
-/usr/lib/spark/sbin
-/usr/lib/spark/ui-resources
-/usr/lib/spark/conf
/usr/lib/spark/LICENSE
-/usr/lib/spark/NOTICE
/usr/lib/spark/RELEASE
-/usr/share/doc/spark
+/usr/lib/spark/NOTICE
+/usr/lib/spark/bin/beeline
+/usr/lib/spark/bin/compute-classpath.sh
+/usr/lib/spark/bin/load-spark-env.sh
+/usr/lib/spark/bin/run-example
+/usr/lib/spark/bin/spark-class
+/usr/lib/spark/bin/spark-shell
+/usr/lib/spark/bin/spark-sql
+/usr/lib/spark/bin/spark-submit
+/usr/lib/spark/bin/utils.sh
+/usr/lib/spark/conf
+/usr/lib/spark/sbin
/usr/lib/spark/work
+/usr/lib/spark/examples
+/usr/lib/spark/data
+/usr/share/doc/spark
/var/lib/spark/
/var/log/spark/
/var/run/spark/
diff --git a/bigtop-packages/src/rpm/spark/SPECS/spark.spec b/bigtop-packages/src/rpm/spark/SPECS/spark.spec
index 81d658e8..d8a5c10c 100644
--- a/bigtop-packages/src/rpm/spark/SPECS/spark.spec
+++ b/bigtop-packages/src/rpm/spark/SPECS/spark.spec
@@ -23,7 +23,7 @@
%define config_spark %{etc_spark}/conf
%define bin /usr/bin
%define man_dir /usr/share/man
-%define spark_services master worker
+%define spark_services master worker history-server thriftserver
%if %{?suse_version:1}0
%define doc_spark %{_docdir}/spark
@@ -50,8 +50,10 @@ Source1: do-component-build
Source2: install_%{spark_name}.sh
Source3: spark-master.svc
Source4: spark-worker.svc
-Source5: compute-classpath.sh
Source6: init.d.tmpl
+Source7: spark-history-server.svc
+Source8: spark-thriftserver.svc
+Source9: bigtop.bom
Requires: bigtop-utils >= 0.7, hadoop-client
Requires(preun): /sbin/service
@@ -101,6 +103,22 @@ Requires: spark-core = %{version}-%{release}, python
%description -n spark-python
Includes PySpark, an interactive Python shell for Spark, and related libraries
+%package -n spark-history-server
+Summary: History server for Apache Spark
+Group: Development/Libraries
+Requires: spark-core = %{version}-%{release}
+
+%description -n spark-history-server
+History server for Apache Spark
+
+%package -n spark-thriftserver
+Summary: Thrift server for Spark SQL
+Group: Development/Libraries
+Requires: spark-core = %{version}-%{release}
+
+%description -n spark-thriftserver
+Thrift server for Spark SQL
+
%prep
%setup -n %{spark_name}-%{spark_base_version}
@@ -151,17 +169,22 @@ done
%defattr(-,root,root,755)
%config(noreplace) %{config_spark}.dist
%doc %{doc_spark}
-%{lib_spark}
+%{lib_spark}/conf
+%{lib_spark}/LICENSE
+%{lib_spark}/RELEASE
+%{lib_spark}/NOTICE
%{lib_spark}/bin
-%{lib_spark}/sbin
%{lib_spark}/lib
-%exclude %{lib_spark}/bin/pyspark
+%{lib_spark}/sbin
+%{lib_spark}/data
+%{lib_spark}/examples
+%{lib_spark}/work
+%exclude %{bin_spark}/pyspark
%exclude %{lib_spark}/python
%{etc_spark}
%attr(0755,spark,spark) %{var_lib_spark}
%attr(0755,spark,spark) %{var_run_spark}
%attr(0755,spark,spark) %{var_log_spark}
-%attr(0755,root,root) %{bin_spark}
%{bin}/spark-shell
%{bin}/spark-executor
%{bin}/spark-submit
@@ -189,3 +212,5 @@ if [ $1 -ge 1 ]; then \
fi
%service_macro spark-master
%service_macro spark-worker
+%service_macro spark-history-server
+%service_macro spark-thriftserver
diff --git a/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkExample.groovy b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkExample.groovy
new file mode 100644
index 00000000..02cd161f
--- /dev/null
+++ b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkExample.groovy
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bigtop.itest.spark
+
+import org.apache.bigtop.itest.shell.Shell
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.fs.Path
+
+import org.junit.Test
+import org.junit.BeforeClass
+import org.junit.AfterClass
+import static org.junit.Assert.assertEquals
+import static org.junit.Assert.assertTrue
+import static org.junit.Assert.assertNotNull
+
+import static org.apache.bigtop.itest.LogErrorsUtils.logError
+
+public class TestSparkExample {
+
+ private static String SPARK_HOME = System.getenv("SPARK_HOME");
+ private static String SPARK_MASTER = System.getenv("SPARK_MASTER");
+ static {
+ assertNotNull("SPARK_HOME has to be set to run this test", SPARK_HOME);
+ assertNotNull("SPARK_MASTER has to be set to run this test", SPARK_MASTER);
+ }
+ static final String SPARK_EXAMPLES_DIR = SPARK_HOME + "/examples";
+ static final String sparkExamplesJarFile = "spark-examples.jar";
+ static final String SPARK_EXAMPLES_JAR = SPARK_HOME + "/lib/" + sparkExamplesJarFile;
+
+ static Shell sh = new Shell("/bin/bash -s");
+
+ @BeforeClass
+ static void setUp() {
+
+ }
+
+ @AfterClass
+ public static void tearDown() {
+
+ }
+
+ @Test
+ void testSparkExample() {
+ def examples = ["SparkPi", "JavaSparkPi"];
+ examples.each() {
+ String exampleClass = "org.apache.spark.examples.${it}"
+ sh.exec("cd ${SPARK_HOME} && ./bin/spark-submit --class " + exampleClass + " --master ${SPARK_MASTER} " + SPARK_EXAMPLES_JAR);
+ logError(sh);
+ assertTrue("Running Spark example ${it} failed", sh.getRet() == 0);
+ }
+ }
+
+ @Test
+ void testSparkPythonExample() {
+ def pyExamples = ["pi.py"];
+ pyExamples.each() {
+ sh.exec("cd ${SPARK_HOME} && ./bin/spark-submit --master ${SPARK_MASTER} " + SPARK_EXAMPLES_DIR + "/src/main/python/${it}");
+ logError(sh);
+ assertTrue("Running Spark Python example {it} failed", sh.getRet() == 0);
+ }
+ }
+
+}
diff --git a/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy
index 4fcb67e9..4c601a0b 100644
--- a/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy
+++ b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy
@@ -40,7 +40,7 @@ public class TestSparkSmoke implements Serializable {
private static String pwd = ""
private static Configuration conf
static Shell sh = new Shell("/bin/bash -s")
- def result = ["0.2: 3", "0.1: 3", "0.0: 3", "9.0: 3", "9.2: 3", "9.1: 3"]
+ def result = ["9.1: 3", "9.2: 3", "0.2: 3", "9.0: 3", "0.0: 3", "0.1: 3"]
@BeforeClass
static void setUp() {
@@ -53,7 +53,7 @@ public class TestSparkSmoke implements Serializable {
@Test
void ShellTest() {
String kmeans = "file://" + pwd + "/kmeans_data.txt"
- sh.exec("cd ${SPARK_HOME} && ./spark-class org.apache.spark.examples.JavaWordCount local " + kmeans)
+ sh.exec("cd ${SPARK_HOME} && ./bin/spark-submit --class org.apache.spark.examples.JavaWordCount --master local lib/spark-examples.jar " + kmeans)
logError(sh)
assertEquals(result, sh.out)
}
@@ -68,7 +68,7 @@ public class TestSparkSmoke implements Serializable {
fs.close()
String dfsname = fs_default_name + pathname
- sh.exec("cd ${SPARK_HOME} && ./spark-class org.apache.spark.examples.JavaWordCount ${SPARK_MASTER} " + dfsname)
+ sh.exec("cd ${SPARK_HOME} && ./bin/spark-submit --class org.apache.spark.examples.JavaWordCount --master ${SPARK_MASTER} lib/spark-examples.jar " + dfsname)
logError(sh)
assertEquals(result, sh.out)
}
diff --git a/bigtop-tests/test-artifacts/spark/src/main/resources/README.md b/bigtop-tests/test-artifacts/spark/src/main/resources/README.md
deleted file mode 100644
index d042d7e9..00000000
--- a/bigtop-tests/test-artifacts/spark/src/main/resources/README.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Spark
-
-Lightning-Fast Cluster Computing - <http://spark.incubator.apache.org/>
-
-
-## Online Documentation
-
-You can find the latest Spark documentation, including a programming
-guide, on the project webpage at <http://spark.incubator.apache.org/documentation.html>.
-This README file only contains basic setup instructions.
-
-
-## Building
-
-Spark requires Scala 2.9.2 (Scala 2.10 is not yet supported). The project is
-built using Simple Build Tool (SBT), which is packaged with it. To build
-Spark and its example programs, run:
-
- sbt/sbt package
-
-Spark also supports building using Maven. If you would like to build using Maven,
-see the [instructions for building Spark with Maven](http://spark.incubator.apache.org/docs/latest/building-with-maven.html)
-in the spark documentation..
-
-To run Spark, you will need to have Scala's bin directory in your `PATH`, or
-you will need to set the `SCALA_HOME` environment variable to point to where
-you've installed Scala. Scala must be accessible through one of these
-methods on your cluster's worker nodes as well as its master.
-
-To run one of the examples, use `./run <class> <params>`. For example:
-
- ./run spark.examples.SparkLR local[2]
-
-will run the Logistic Regression example locally on 2 CPUs.
-
-Each of the example programs prints usage help if no params are given.
-
-All of the Spark samples take a `<host>` parameter that is the cluster URL
-to connect to. This can be a mesos:// or spark:// URL, or "local" to run
-locally with one thread, or "local[N]" to run locally with N threads.
-
-
-## A Note About Hadoop Versions
-
-Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported
-storage systems. Because the HDFS API has changed in different versions of
-Hadoop, you must build Spark against the same version that your cluster runs.
-You can change the version by setting the `HADOOP_VERSION` variable at the top
-of `project/SparkBuild.scala`, then rebuilding Spark.
-
-
-## Configuration
-
-Please refer to the "Configuration" guide in the online documentation for a
-full overview on how to configure Spark. At the minimum, you will need to
-create a `conf/spark-env.sh` script (copy `conf/spark-env.sh.template`) and
-set the following two variables:
-
-- `SCALA_HOME`: Location where Scala is installed.
-
-- `MESOS_NATIVE_LIBRARY`: Your Mesos library (only needed if you want to run
- on Mesos). For example, this might be `/usr/local/lib/libmesos.so` on Linux.
-
-
-## Contributing to Spark
-
-Contributions via GitHub pull requests are gladly accepted from their original
-author. Along with any pull requests, please state that the contribution is
-your original work and that you license the work to the project under the
-project's open source license. Whether or not you state this explicitly, by
-submitting any copyrighted material via pull request, email, or other means
-you agree to license the material under the project's open source license and
-warrant that you have the legal authority to do so.
diff --git a/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt b/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt
deleted file mode 100644
index 338664f7..00000000
--- a/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-0.0 0.0 0.0
-0.1 0.1 0.1
-0.2 0.2 0.2
-9.0 9.0 9.0
-9.1 9.1 9.1
-9.2 9.2 9.2
diff --git a/bigtop.mk b/bigtop.mk
index bec0f822..6b1ab0f2 100644
--- a/bigtop.mk
+++ b/bigtop.mk
@@ -247,8 +247,8 @@ $(eval $(call PACKAGE,crunch,CRUNCH))
SPARK_NAME=spark
SPARK_RELNOTES_NAME=Spark
SPARK_PKG_NAME=spark-core
-SPARK_BASE_VERSION=1.1.0
-SPARK_PKG_VERSION=1.1.0
+SPARK_BASE_VERSION=1.2.1
+SPARK_PKG_VERSION=1.2.1
SPARK_RELEASE_VERSION=1
SPARK_TARBALL_DST=spark-$(SPARK_BASE_VERSION).tar.gz
SPARK_TARBALL_SRC=spark-$(SPARK_BASE_VERSION).tgz
diff --git a/pom.xml b/pom.xml
index 1e8df779..38516cb8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,7 +48,7 @@
<zookeeper.version>3.4.5</zookeeper.version>
<giraph.version>1.0.0</giraph.version>
<solr.version>4.6.0</solr.version>
- <spark.version>0.9.1</spark.version>
+ <spark.version>1.2.1</spark.version>
<kafka.version>0.8.1.1</kafka.version>
<phoenix.version>4.2.2</phoenix.version>
<spark-smoke.version>${project.version}</spark-smoke.version>