BIGTOP-1648. Update to Spark 1x to Spark 1.2.1.

Signed-off-by: jayunit100 <jay@apache.org>
author: Youngwoo Kim <warwithin@gmail.com> 2015-02-08 18:50:50 +0900
committer: jayunit100 <jay@apache.org> 2015-03-01 22:56:14 -0500
commit: e4288e08d7bab850d8cf4f46fb5f8cd9f2a397ca (patch)
tree: 605fa5fdad577e9afd93bbd94122ac06e23878e3
parent: d32c468e8081f13c857c26b3d9ad1e0a5ad1ac38 (diff)
17 files changed, 321 insertions, 191 deletions
diff --git a/bigtop-packages/src/common/hadoop/init-hcfs.json b/bigtop-packages/src/common/hadoop/init-hcfs.json
index d8825aa3..bd97a220 100644
--- a/bigtop-packages/src/common/hadoop/init-hcfs.json
+++ b/bigtop-packages/src/common/hadoop/init-hcfs.json
@@ -86,7 +86,8 @@
     ["/user/oozie/share/lib/hive", null, null, null],
     ["/user/oozie/share/lib/mapreduce-streaming", null, null, null],
     ["/user/oozie/share/lib/distcp", null, null, null],
-    ["/user/oozie/share/lib/pig", null, null, null]
+    ["/user/oozie/share/lib/pig", null, null, null],
+    ["/var/log/spark/apps","1777","spark","spark"]
  ],
   "user": [
     ["tom", "0755", null],
diff --git a/bigtop-packages/src/common/hadoop/init-hdfs.sh b/bigtop-packages/src/common/hadoop/init-hdfs.sh
index 3a5fe361..1bf820f6 100755
--- a/bigtop-packages/src/common/hadoop/init-hdfs.sh
+++ b/bigtop-packages/src/common/hadoop/init-hdfs.sh
@@ -69,6 +69,11 @@ su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share/lib/hive'
 su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share/lib/mapreduce-streaming'
 su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share/lib/distcp'
 su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share/lib/pig'
+# Event log directory for Apache Spark
+su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir -p /var/log/spark/apps'
+su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod -R 1777 /var/log/spark/apps'
+su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown spark:spark /var/log/spark/apps'
+
 # Copy over files from local filesystem to HDFS that oozie might need
 if ls /usr/lib/hive/lib/*.jar &> /dev/null; then
   su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -put /usr/lib/hive/lib/*.jar /user/oozie/share/lib/hive'
diff --git a/bigtop-packages/src/common/spark/compute-classpath.sh b/bigtop-packages/src/common/spark/compute-classpath.sh
deleted file mode 100644
index eb6a8076..00000000
--- a/bigtop-packages/src/common/spark/compute-classpath.sh
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
-# script and the ExecutorRunner in standalone cluster mode.
-
-SCALA_VERSION=2.10
-
-# Figure out where Spark is installed
-FWDIR="$(cd `dirname $0`/..; pwd)"
-
-# Load environment variables from conf/spark-env.sh, if it exists
-if [ -e $FWDIR/conf/spark-env.sh ] ; then
-  . $FWDIR/conf/spark-env.sh
-fi
-
-CORE_DIR="$FWDIR/core"
-ASSEMBLY_DIR="$FWDIR/lib"
-PYSPARK_DIR="$FWDIR/python"
-
-# Build up classpath
-CLASSPATH="$SPARK_CLASSPATH"
-CLASSPATH="$CLASSPATH:$FWDIR/conf"
-CLASSPATH="$CLASSPATH:$ASSEMBLY_DIR/*"
-if [ -e "$PYSPARK_DIR" ]; then
-  for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
-    CLASSPATH="$CLASSPATH:$jar"
-  done
-fi
-
-# Add hadoop conf dir - else FileSystem.*, etc fail !
-# Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
-# the configuration files.
-
-export DEFAULT_HADOOP=/usr/lib/hadoop
-export DEFAULT_HADOOP_CONF=/etc/hadoop/conf
-export HADOOP_HOME=${HADOOP_HOME:-$DEFAULT_HADOOP}
-export HADOOP_HDFS_HOME=${HADOOP_HDFS_HOME:-${HADOOP_HOME}/../hadoop-hdfs}
-export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_HOME}/../hadoop-mapreduce}
-export HADOOP_YARN_HOME=${HADOOP_YARN_HOME:-${HADOOP_HOME}/../hadoop-yarn}
-export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-$DEFAULT_HADOOP_CONF}
-
-CLASSPATH="$CLASSPATH:$HADOOP_CONF_DIR"
-if [ "x" != "x$YARN_CONF_DIR" ]; then
-  CLASSPATH="$CLASSPATH:$YARN_CONF_DIR"
-fi
-# Let's make sure that all needed hadoop libs are added properly
-CLASSPATH="$CLASSPATH:$HADOOP_HOME/*:$HADOOP_HDFS_HOME/*:$HADOOP_YARN_HOME/*:$HADOOP_MAPRED_HOME/*"
-# Add Scala standard library
-if [ -z "$SCALA_LIBRARY_PATH" ]; then
-  if [ -z "$SCALA_HOME" ]; then
-    echo "SCALA_HOME is not set" >&2
-    exit 1
-  fi
-  SCALA_LIBRARY_PATH="$SCALA_HOME/lib"
-fi
-CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-library.jar"
-CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-compiler.jar"
-CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/jline.jar"
-
-echo "$CLASSPATH"
diff --git a/bigtop-packages/src/common/spark/do-component-build b/bigtop-packages/src/common/spark/do-component-build
index 5327c35d..deecbe2e 100644
--- a/bigtop-packages/src/common/spark/do-component-build
+++ b/bigtop-packages/src/common/spark/do-component-build
@@ -23,9 +23,6 @@ if [ "x$SCALA_HOME" = "x" ]; then
     exit 2
 fi
 
-# FIXME: this is fixed in Spark 1.x
-sed -i -e '/<dependencies>/a<dependency><groupId>commons-cli</groupId><artifactId>commons-cli</artifactId><version>1.2</version></dependency>' assembly/pom.xml
-
 BUILD_OPTS="-Divy.home=${HOME}/.ivy2 -Dsbt.ivy.home=${HOME}/.ivy2 -Duser.home=${HOME} \
             -Drepo.maven.org=$IVY_MIRROR_PROP \
             -Dreactor.repo=file://${HOME}/.m2/repository \
@@ -37,4 +34,4 @@ BUILD_OPTS="-Divy.home=${HOME}/.ivy2 -Dsbt.ivy.home=${HOME}/.ivy2 -Duser.home=${
 #        http://maven.40175.n5.nabble.com/Not-finding-artifact-in-local-repo-td3727753.html
 export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m -XX:PermSize=1024m -XX:MaxPermSize=1024m"
 
-mvn -Pbigtop-dist -Pyarn -Phive $BUILD_OPTS install
+mvn -Pbigtop-dist -Pyarn -Phive -Phive-thriftserver $BUILD_OPTS install
diff --git a/bigtop-packages/src/common/spark/install_spark.sh b/bigtop-packages/src/common/spark/install_spark.sh
index 3dfd2f6e..e28edd3b 100644
--- a/bigtop-packages/src/common/spark/install_spark.sh
+++ b/bigtop-packages/src/common/spark/install_spark.sh
@@ -125,6 +125,7 @@ install -d -m 0755 $PREFIX/$LIB_DIR/lib
 install -d -m 0755 $PREFIX/$LIB_DIR/bin
 install -d -m 0755 $PREFIX/$LIB_DIR/sbin
 install -d -m 0755 $PREFIX/$DOC_DIR
+install -d -m 0755 $PREFIX/$EXAMPLES_DIR
 
 install -d -m 0755 $PREFIX/var/lib/spark/
 install -d -m 0755 $PREFIX/var/log/spark/
@@ -139,19 +140,18 @@ tar --wildcards -C $PREFIX/$LIB_DIR/ -zxf ${BUILD_DIR}/assembly/target/spark-ass
 rm -rf $PREFIX/$LIB_DIR/bin/*.cmd
 
 # Examples jar
-cp ${BUILD_DIR}/examples/target/spark-examples*${SPARK_VERSION}.jar $PREFIX/$LIB_DIR/lib
-sed -i -e "s|lib/spark-examples-\*hadoop\*.jar|lib/spark-examples_\*.jar|" $PREFIX/$LIB_DIR/bin/run-example
+cp ${BUILD_DIR}/examples/target/spark-examples*${SPARK_VERSION}.jar $PREFIX/$LIB_DIR/lib/spark-examples-${SPARK_VERSION}-hadoop${HADOOP_VERSION}.jar
 
 # Examples src
-install -d -m 0755 $PREFIX/$EXAMPLES_DIR
 cp -ra ${BUILD_DIR}/examples/src $PREFIX/$EXAMPLES_DIR/
+ln -s $EXAMPLES_DIR $PREFIX/$LIB_DIR/examples
+
+# Data
+cp -ra ${BUILD_DIR}/data $PREFIX/$LIB_DIR/
 
 chmod 755 $PREFIX/$LIB_DIR/bin/*
 chmod 755 $PREFIX/$LIB_DIR/sbin/*
 
-cp -a ${SOURCE_DIR}/compute-classpath.sh $PREFIX/$LIB_DIR/bin/
-chmod 755 $PREFIX/$LIB_DIR/bin/compute-classpath.sh
-
 # Copy in the configuration files
 install -d -m 0755 $PREFIX/$CONF_DIR
 cp -a ${BUILD_DIR}/conf/* $PREFIX/$CONF_DIR
@@ -162,7 +162,7 @@ ln -s /etc/spark/conf $PREFIX/$LIB_DIR/conf
 install -d -m 0755 $PREFIX/$BIN_DIR
 for wrap in sbin/spark-executor bin/spark-shell bin/spark-submit; do
   cat > $PREFIX/$BIN_DIR/`basename $wrap` <<EOF
-#!/bin/bash 
+#!/bin/bash
 
 # Autodetect JAVA_HOME if not defined
 . /usr/lib/bigtop-utils/bigtop-detect-javahome
@@ -173,6 +173,7 @@ EOF
 done
 
 cat >> $PREFIX/$CONF_DIR/spark-env.sh <<EOF
+export SPARK_SCALA_VERSION=2.10
 
 ### Let's run everything with JVM runtime, instead of Scala
 export SPARK_LAUNCH_WITH_SCALA=0
@@ -184,6 +185,16 @@ export SPARK_WORKER_PORT=7078
 export SPARK_WORKER_WEBUI_PORT=18081
 export SPARK_WORKER_DIR=/var/run/spark/work
 export SPARK_LOG_DIR=/var/log/spark
+export SPARK_HISTORY_OPTS="\$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082"
+
+export HADOOP_HOME=\${HADOOP_HOME:-/usr/lib/hadoop}
+export HADOOP_HDFS_HOME=\${HADOOP_HDFS_HOME:-\${HADOOP_HOME}/../hadoop-hdfs}
+export HADOOP_MAPRED_HOME=\${HADOOP_MAPRED_HOME:-\${HADOOP_HOME}/../hadoop-mapreduce}
+export HADOOP_YARN_HOME=\${HADOOP_YARN_HOME:-\${HADOOP_HOME}/../hadoop-yarn}
+export HADOOP_CONF_DIR=\${HADOOP_CONF_DIR:-/etc/hadoop/conf}
+
+# Let's make sure that all needed hadoop libs are added properly
+CLASSPATH="\$CLASSPATH:\$HADOOP_HOME/*:\$HADOOP_HDFS_HOME/*:\$HADOOP_YARN_HOME/*:\$HADOOP_MAPRED_HOME/*"
 
 if [ -n "\$HADOOP_HOME" ]; then
   export SPARK_LIBRARY_PATH=\$SPARK_LIBRARY_PATH:\${HADOOP_HOME}/lib/native
@@ -201,6 +212,7 @@ EOF
 ln -s /var/run/spark/work $PREFIX/$LIB_DIR/work
 
 cp -r ${BUILD_DIR}/python ${PREFIX}/${INSTALLED_LIB_DIR}/
+rm -f ${PREFIX}/${INSTALLED_LIB_DIR}/python/.gitignore
 cat > $PREFIX/$BIN_DIR/pyspark <<EOF
 #!/bin/bash
 
@@ -214,5 +226,7 @@ EOF
 chmod 755 $PREFIX/$BIN_DIR/pyspark
 
 touch $PREFIX/$LIB_DIR/RELEASE
-
 cp ${BUILD_DIR}/{LICENSE,NOTICE} ${PREFIX}/${LIB_DIR}/
+
+# Version-less symlinks
+(cd $PREFIX/$LIB_DIR/lib; ln -s spark-assembly*.jar spark-assembly.jar; ln -s spark-examples*.jar spark-examples.jar)
diff --git a/bigtop-packages/src/common/spark/spark-history-server.svc b/bigtop-packages/src/common/spark/spark-history-server.svc
new file mode 100644
index 00000000..0410d160
--- /dev/null
+++ b/bigtop-packages/src/common/spark/spark-history-server.svc
@@ -0,0 +1,70 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+TYPE="history-server"
+DAEMON="spark-${TYPE}"
+DESC="Spark ${TYPE}"
+EXEC_PATH="/usr/lib/spark/bin/spark-class"
+SVC_USER="spark"
+WORKING_DIR="/var/lib/spark"
+DAEMON_FLAGS=""
+CONF_DIR="/etc/spark/conf"
+PIDFILE="/var/run/spark/${DAEMON}.pid"
+
+generate_start() {
+
+cat <<'__EOT__'
+start() {
+    [ -x $EXE_FILE ] || exit $ERROR_PROGRAM_NOT_INSTALLED
+    log_success_msg "Starting $DESC (${DAEMON}): "
+
+    checkstatusofproc
+    status=$?
+    if [ "$status" -eq "$STATUS_RUNNING" ]; then
+        log_success_msg "${DESC} is running"
+        exit 0
+    fi
+
+    LOG_FILE=/var/log/spark/${DAEMON}.out
+
+    su -s /bin/bash $SVC_USER -c "nohup nice -n 0 \
+        ${EXEC_PATH} org.apache.spark.deploy.history.HistoryServer $DAEMON_FLAGS \
+        > $LOG_FILE 2>&1 & "'echo $!' > "$PIDFILE"
+
+    sleep 3
+
+    checkstatusofproc
+    RETVAL=$?
+    [ $RETVAL -eq $STATUS_RUNNING ] && touch $LOCKFILE
+    return $RETVAL
+}
+__EOT__
+
+}
+
+generate_stop() {
+
+cat <<'__EOT__'
+stop() {
+    log_success_msg "Stopping $DESC (${DAEMON}): "
+    killproc -p $PIDFILE java
+    RETVAL=$?
+
+    [ $RETVAL -eq $RETVAL_SUCCESS ] && rm -f $LOCKFILE $PIDFILE
+    return $RETVAL
+}
+__EOT__
+
+}
diff --git a/bigtop-packages/src/common/spark/spark-thriftserver.svc b/bigtop-packages/src/common/spark/spark-thriftserver.svc
new file mode 100644
index 00000000..4eba4a4a
--- /dev/null
+++ b/bigtop-packages/src/common/spark/spark-thriftserver.svc
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+TYPE="thriftserver"
+DAEMON="spark-${TYPE}"
+DESC="Spark ${TYPE}"
+EXEC_PATH="/usr/lib/spark/sbin/spark-daemon.sh"
+SVC_USER="spark"
+WORKING_DIR="/var/lib/spark"
+DAEMON_FLAGS=""
+CONF_DIR="/etc/spark/conf"
+PIDFILE="/var/run/spark/${DAEMON}.pid"
+
+generate_start() {
+
+cat <<'__EOT__'
+start() {
+    [ -x $EXE_FILE ] || exit $ERROR_PROGRAM_NOT_INSTALLED
+    log_success_msg "Starting $DESC (${DAEMON}): "
+
+    checkstatusofproc
+    status=$?
+    if [ "$status" -eq "$STATUS_RUNNING" ]; then
+        log_success_msg "${DESC} is running"
+        exit 0
+    fi
+
+    function usage {
+      echo
+    }
+    export SUBMIT_USAGE_FUNCTION=usage
+
+    su -s /bin/bash $SVC_USER -c " \
+        ${EXEC_PATH} spark-submit org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 1 $DAEMON_FLAGS"
+
+    sleep 3
+
+    checkstatusofproc
+    RETVAL=$?
+    [ $RETVAL -eq $STATUS_RUNNING ] && touch $LOCKFILE
+    return $RETVAL
+}
+__EOT__
+
+}
+
+generate_stop() {
+
+cat <<'__EOT__'
+stop() {
+    su -s /bin/bash $SVC_USER -c " \
+        ${EXEC_PATH} stop org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 1"
+    RETVAL=$?
+
+    [ $RETVAL -eq $RETVAL_SUCCESS ] && rm -f $LOCKFILE $PIDFILE
+    return $RETVAL
+}
+__EOT__
+
+}
diff --git a/bigtop-packages/src/deb/spark/control b/bigtop-packages/src/deb/spark/control
index ae2f07eb..856d622e 100644
--- a/bigtop-packages/src/deb/spark/control
+++ b/bigtop-packages/src/deb/spark/control
@@ -23,7 +23,7 @@ Homepage: http://spark.apache.org/
 
 Package: spark-core
 Architecture: all
-Depends: bigtop-utils (>= 0.7), hadoop-client
+Depends: adduser, bigtop-utils (>= 0.7), hadoop-client
 Description: Lightning-Fast Cluster Computing
  Spark is a MapReduce-like cluster computing framework designed to support
  low-latency iterative jobs and interactive use from an interpreter. It is
@@ -48,3 +48,15 @@ Architecture: all
 Depends: spark-core (= ${source:Version}), python
 Description: Python client for Spark
  Includes PySpark, an interactive Python shell for Spark, and related libraries
+
+Package: spark-history-server
+Architecture: all
+Depends: spark-core (= ${source:Version})
+Description: History server for Apache Spark
+ History server for Apache Spark
+
+Package: spark-thriftserver
+Architecture: all
+Depends: spark-core (= ${source:Version})
+Description: Thrift server for Spark SQL
+ Thrift server for Spark SQL
diff --git a/bigtop-packages/src/deb/spark/rules b/bigtop-packages/src/deb/spark/rules
index a21ba820..58815189 100644
--- a/bigtop-packages/src/deb/spark/rules
+++ b/bigtop-packages/src/deb/spark/rules
@@ -28,7 +28,7 @@ export DH_OPTIONS
 override_dh_auto_build:
 	bash debian/do-component-build
 
-svcs=spark-master spark-worker
+svcs=spark-master spark-worker spark-history-server spark-thriftserver
 
 $(svcs): debian/init.d.tmpl
 	bash $< debian/$@.svc deb debian/$@.init
@@ -36,7 +36,7 @@ $(svcs): debian/init.d.tmpl
 
 override_dh_auto_install: $(svcs)
 	bash -x debian/install_spark.sh \
-	  --build-dir=`pwd` \
-          --doc-dir=/usr/share/doc/spark \
-          --source-dir=debian \
-	  --prefix=debian/tmp
+	--build-dir=`pwd` \
+	--doc-dir=/usr/share/doc/spark \
+	--source-dir=debian \
+	--prefix=debian/tmp
diff --git a/bigtop-packages/src/deb/spark/spark-core.install b/bigtop-packages/src/deb/spark/spark-core.install
index d1efdfdb..77a6bd8b 100644
--- a/bigtop-packages/src/deb/spark/spark-core.install
+++ b/bigtop-packages/src/deb/spark/spark-core.install
@@ -2,16 +2,24 @@
 /usr/bin/spark-executor
 /usr/bin/spark-submit
 /usr/bin/spark-shell
-/usr/lib/spark/bin/
-/usr/lib/spark/lib
-/usr/lib/spark/sbin
-/usr/lib/spark/ui-resources
-/usr/lib/spark/conf
 /usr/lib/spark/LICENSE
-/usr/lib/spark/NOTICE
 /usr/lib/spark/RELEASE
-/usr/share/doc/spark
+/usr/lib/spark/NOTICE
+/usr/lib/spark/bin/beeline
+/usr/lib/spark/bin/compute-classpath.sh
+/usr/lib/spark/bin/load-spark-env.sh
+/usr/lib/spark/bin/run-example
+/usr/lib/spark/bin/spark-class
+/usr/lib/spark/bin/spark-shell
+/usr/lib/spark/bin/spark-sql
+/usr/lib/spark/bin/spark-submit
+/usr/lib/spark/bin/utils.sh
+/usr/lib/spark/conf
+/usr/lib/spark/sbin
 /usr/lib/spark/work
+/usr/lib/spark/examples
+/usr/lib/spark/data
+/usr/share/doc/spark
 /var/lib/spark/
 /var/log/spark/
 /var/run/spark/
diff --git a/bigtop-packages/src/rpm/spark/SPECS/spark.spec b/bigtop-packages/src/rpm/spark/SPECS/spark.spec
index 81d658e8..d8a5c10c 100644
--- a/bigtop-packages/src/rpm/spark/SPECS/spark.spec
+++ b/bigtop-packages/src/rpm/spark/SPECS/spark.spec
@@ -23,7 +23,7 @@
 %define config_spark %{etc_spark}/conf
 %define bin /usr/bin
 %define man_dir /usr/share/man
-%define spark_services master worker
+%define spark_services master worker history-server thriftserver
 
 %if  %{?suse_version:1}0
 %define doc_spark %{_docdir}/spark
@@ -50,8 +50,10 @@ Source1: do-component-build
 Source2: install_%{spark_name}.sh
 Source3: spark-master.svc
 Source4: spark-worker.svc
-Source5: compute-classpath.sh
 Source6: init.d.tmpl
+Source7: spark-history-server.svc
+Source8: spark-thriftserver.svc
+Source9: bigtop.bom
 Requires: bigtop-utils >= 0.7, hadoop-client
 Requires(preun): /sbin/service
 
@@ -101,6 +103,22 @@ Requires: spark-core = %{version}-%{release}, python
 %description -n spark-python
 Includes PySpark, an interactive Python shell for Spark, and related libraries
 
+%package -n spark-history-server
+Summary: History server for Apache Spark
+Group: Development/Libraries
+Requires: spark-core = %{version}-%{release}
+
+%description -n spark-history-server
+History server for Apache Spark
+
+%package -n spark-thriftserver
+Summary: Thrift server for Spark SQL
+Group: Development/Libraries
+Requires: spark-core = %{version}-%{release}
+
+%description -n spark-thriftserver
+Thrift server for Spark SQL
+
 %prep
 %setup -n %{spark_name}-%{spark_base_version}
 
@@ -151,17 +169,22 @@ done
 %defattr(-,root,root,755)
 %config(noreplace) %{config_spark}.dist
 %doc %{doc_spark}
-%{lib_spark}
+%{lib_spark}/conf
+%{lib_spark}/LICENSE
+%{lib_spark}/RELEASE
+%{lib_spark}/NOTICE
 %{lib_spark}/bin
-%{lib_spark}/sbin
 %{lib_spark}/lib
-%exclude %{lib_spark}/bin/pyspark
+%{lib_spark}/sbin
+%{lib_spark}/data
+%{lib_spark}/examples
+%{lib_spark}/work
+%exclude %{bin_spark}/pyspark
 %exclude %{lib_spark}/python
 %{etc_spark}
 %attr(0755,spark,spark) %{var_lib_spark}
 %attr(0755,spark,spark) %{var_run_spark}
 %attr(0755,spark,spark) %{var_log_spark}
-%attr(0755,root,root) %{bin_spark}
 %{bin}/spark-shell
 %{bin}/spark-executor
 %{bin}/spark-submit
@@ -189,3 +212,5 @@ if [ $1 -ge 1 ]; then \
 fi
 %service_macro spark-master
 %service_macro spark-worker
+%service_macro spark-history-server
+%service_macro spark-thriftserver
diff --git a/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkExample.groovy b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkExample.groovy
new file mode 100644
index 00000000..02cd161f
--- /dev/null
+++ b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkExample.groovy
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bigtop.itest.spark
+
+import org.apache.bigtop.itest.shell.Shell
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.fs.Path
+
+import org.junit.Test
+import org.junit.BeforeClass
+import org.junit.AfterClass
+import static org.junit.Assert.assertEquals
+import static org.junit.Assert.assertTrue
+import static org.junit.Assert.assertNotNull
+
+import static org.apache.bigtop.itest.LogErrorsUtils.logError
+
+public class TestSparkExample {
+
+  private static String SPARK_HOME = System.getenv("SPARK_HOME");
+  private static String SPARK_MASTER = System.getenv("SPARK_MASTER");
+  static {
+    assertNotNull("SPARK_HOME has to be set to run this test",  SPARK_HOME);
+    assertNotNull("SPARK_MASTER has to be set to run this test", SPARK_MASTER);
+  }
+  static final String SPARK_EXAMPLES_DIR = SPARK_HOME + "/examples";
+  static final String sparkExamplesJarFile = "spark-examples.jar";
+  static final String SPARK_EXAMPLES_JAR = SPARK_HOME + "/lib/" + sparkExamplesJarFile;
+
+  static Shell sh = new Shell("/bin/bash -s");
+
+  @BeforeClass
+  static void setUp() {
+
+  }
+
+  @AfterClass
+  public static void tearDown() {
+
+  }
+
+  @Test
+  void testSparkExample() {
+    def examples = ["SparkPi", "JavaSparkPi"];
+    examples.each() {
+      String exampleClass = "org.apache.spark.examples.${it}"
+      sh.exec("cd ${SPARK_HOME} && ./bin/spark-submit --class " + exampleClass + " --master ${SPARK_MASTER} " +  SPARK_EXAMPLES_JAR);
+      logError(sh);
+      assertTrue("Running Spark example ${it} failed", sh.getRet() == 0);
+    }
+  }
+
+  @Test
+  void testSparkPythonExample() {
+    def pyExamples = ["pi.py"];
+    pyExamples.each() {
+      sh.exec("cd ${SPARK_HOME} && ./bin/spark-submit --master ${SPARK_MASTER} " + SPARK_EXAMPLES_DIR + "/src/main/python/${it}");
+      logError(sh);
+      assertTrue("Running Spark Python example {it} failed", sh.getRet() == 0);
+    }
+  }
+
+}
diff --git a/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy
index 4fcb67e9..4c601a0b 100644
--- a/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy
+++ b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy
@@ -40,7 +40,7 @@ public class TestSparkSmoke implements Serializable {
   private static String pwd = ""
   private static Configuration conf
   static Shell sh = new Shell("/bin/bash -s")
-  def result = ["0.2: 3", "0.1: 3", "0.0: 3", "9.0: 3", "9.2: 3", "9.1: 3"]
+  def result = ["9.1: 3", "9.2: 3", "0.2: 3", "9.0: 3", "0.0: 3", "0.1: 3"]
 
   @BeforeClass
   static void setUp() {
@@ -53,7 +53,7 @@ public class TestSparkSmoke implements Serializable {
   @Test
   void ShellTest() {
     String kmeans = "file://" + pwd + "/kmeans_data.txt"
-    sh.exec("cd ${SPARK_HOME} && ./spark-class org.apache.spark.examples.JavaWordCount local " + kmeans)
+    sh.exec("cd ${SPARK_HOME} && ./bin/spark-submit --class org.apache.spark.examples.JavaWordCount --master local lib/spark-examples.jar " + kmeans)
     logError(sh)
     assertEquals(result, sh.out)
   }
@@ -68,7 +68,7 @@ public class TestSparkSmoke implements Serializable {
     fs.close()
 
     String dfsname = fs_default_name + pathname
-    sh.exec("cd ${SPARK_HOME} && ./spark-class org.apache.spark.examples.JavaWordCount ${SPARK_MASTER} " + dfsname)
+    sh.exec("cd ${SPARK_HOME} && ./bin/spark-submit --class org.apache.spark.examples.JavaWordCount --master ${SPARK_MASTER} lib/spark-examples.jar " + dfsname)
     logError(sh)
     assertEquals(result, sh.out)
   }
diff --git a/bigtop-tests/test-artifacts/spark/src/main/resources/README.md b/bigtop-tests/test-artifacts/spark/src/main/resources/README.md
deleted file mode 100644
index d042d7e9..00000000
--- a/bigtop-tests/test-artifacts/spark/src/main/resources/README.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Spark
-
-Lightning-Fast Cluster Computing - <http://spark.incubator.apache.org/>
-
-
-## Online Documentation
-
-You can find the latest Spark documentation, including a programming
-guide, on the project webpage at <http://spark.incubator.apache.org/documentation.html>.
-This README file only contains basic setup instructions.
-
-
-## Building
-
-Spark requires Scala 2.9.2 (Scala 2.10 is not yet supported). The project is
-built using Simple Build Tool (SBT), which is packaged with it. To build
-Spark and its example programs, run:
-
-    sbt/sbt package
-
-Spark also supports building using Maven. If you would like to build using Maven,
-see the [instructions for building Spark with Maven](http://spark.incubator.apache.org/docs/latest/building-with-maven.html)
-in the spark documentation..
-
-To run Spark, you will need to have Scala's bin directory in your `PATH`, or
-you will need to set the `SCALA_HOME` environment variable to point to where
-you've installed Scala. Scala must be accessible through one of these
-methods on your cluster's worker nodes as well as its master.
-
-To run one of the examples, use `./run <class> <params>`. For example:
-
-    ./run spark.examples.SparkLR local[2]
-
-will run the Logistic Regression example locally on 2 CPUs.
-
-Each of the example programs prints usage help if no params are given.
-
-All of the Spark samples take a `<host>` parameter that is the cluster URL
-to connect to. This can be a mesos:// or spark:// URL, or "local" to run
-locally with one thread, or "local[N]" to run locally with N threads.
-
-
-## A Note About Hadoop Versions
-
-Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported
-storage systems. Because the HDFS API has changed in different versions of
-Hadoop, you must build Spark against the same version that your cluster runs.
-You can change the version by setting the `HADOOP_VERSION` variable at the top
-of `project/SparkBuild.scala`, then rebuilding Spark.
-
-
-## Configuration
-
-Please refer to the "Configuration" guide in the online documentation for a
-full overview on how to configure Spark. At the minimum, you will need to
-create a `conf/spark-env.sh` script (copy `conf/spark-env.sh.template`) and
-set the following two variables:
-
-- `SCALA_HOME`: Location where Scala is installed.
-
-- `MESOS_NATIVE_LIBRARY`: Your Mesos library (only needed if you want to run
-  on Mesos). For example, this might be `/usr/local/lib/libmesos.so` on Linux.
-
-
-## Contributing to Spark
-
-Contributions via GitHub pull requests are gladly accepted from their original
-author. Along with any pull requests, please state that the contribution is
-your original work and that you license the work to the project under the
-project's open source license. Whether or not you state this explicitly, by
-submitting any copyrighted material via pull request, email, or other means
-you agree to license the material under the project's open source license and
-warrant that you have the legal authority to do so.
diff --git a/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt b/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt
deleted file mode 100644
index 338664f7..00000000
--- a/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-0.0 0.0 0.0
-0.1 0.1 0.1
-0.2 0.2 0.2
-9.0 9.0 9.0
-9.1 9.1 9.1
-9.2 9.2 9.2
diff --git a/bigtop.mk b/bigtop.mk
index bec0f822..6b1ab0f2 100644
--- a/bigtop.mk
+++ b/bigtop.mk
@@ -247,8 +247,8 @@ $(eval $(call PACKAGE,crunch,CRUNCH))
 SPARK_NAME=spark
 SPARK_RELNOTES_NAME=Spark
 SPARK_PKG_NAME=spark-core
-SPARK_BASE_VERSION=1.1.0
-SPARK_PKG_VERSION=1.1.0
+SPARK_BASE_VERSION=1.2.1
+SPARK_PKG_VERSION=1.2.1
 SPARK_RELEASE_VERSION=1
 SPARK_TARBALL_DST=spark-$(SPARK_BASE_VERSION).tar.gz
 SPARK_TARBALL_SRC=spark-$(SPARK_BASE_VERSION).tgz
diff --git a/pom.xml b/pom.xml
index 1e8df779..38516cb8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,7 +48,7 @@
     <zookeeper.version>3.4.5</zookeeper.version>
     <giraph.version>1.0.0</giraph.version>
     <solr.version>4.6.0</solr.version>
-    <spark.version>0.9.1</spark.version>
+    <spark.version>1.2.1</spark.version>
     <kafka.version>0.8.1.1</kafka.version>
     <phoenix.version>4.2.2</phoenix.version>
     <spark-smoke.version>${project.version}</spark-smoke.version>
author	Youngwoo Kim <warwithin@gmail.com>	2015-02-08 18:50:50 +0900
committer	jayunit100 <jay@apache.org>	2015-03-01 22:56:14 -0500
commit	e4288e08d7bab850d8cf4f46fb5f8cd9f2a397ca (patch)
tree	605fa5fdad577e9afd93bbd94122ac06e23878e3
parent	d32c468e8081f13c857c26b3d9ad1e0a5ad1ac38 (diff)