aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Kelly <jonathak@amazon.com>2015-10-02 15:13:18 -0700
committerKonstantin Boudnik <cos@apache.org>2015-11-02 13:02:28 -0800
commit05d9fada278f23c97079afb2429b590afa100b1e (patch)
tree5b7c513e3a193d16df6ddbb5824565b9fa591e59
parentbf67f6e5bf72934c577f2eeaabe063aff3a9ca99 (diff)
BIGTOP-2104. Packages upgrade to Spark 1.5.1
-rw-r--r--bigtop-packages/src/common/spark/do-component-build15
-rw-r--r--bigtop-packages/src/common/spark/install_spark.sh79
-rw-r--r--bigtop-packages/src/common/spark/patch0.diff11
-rwxr-xr-xbigtop-packages/src/common/spark/spark-env.sh43
-rw-r--r--bigtop-packages/src/deb/spark/control15
-rw-r--r--bigtop-packages/src/deb/spark/spark-core.install14
-rw-r--r--bigtop-packages/src/deb/spark/spark-datanucleus.install2
-rw-r--r--bigtop-packages/src/deb/spark/spark-extras.install1
-rw-r--r--bigtop-packages/src/deb/spark/spark-yarn-shuffle.install2
-rw-r--r--bigtop-packages/src/rpm/spark/SPECS/spark.spec41
-rw-r--r--bigtop.bom2
11 files changed, 158 insertions, 67 deletions
diff --git a/bigtop-packages/src/common/spark/do-component-build b/bigtop-packages/src/common/spark/do-component-build
index 3d705a54..8d74f4a4 100644
--- a/bigtop-packages/src/common/spark/do-component-build
+++ b/bigtop-packages/src/common/spark/do-component-build
@@ -24,9 +24,20 @@ BUILD_OPTS="-Divy.home=${HOME}/.ivy2 -Dsbt.ivy.home=${HOME}/.ivy2 -Duser.home=${
-Dhadoop.version=$HADOOP_VERSION \
-Dyarn.version=$HADOOP_VERSION \
-Dprotobuf.version=2.5.0 \
- -DskipTests -DrecompileMode=all"
+ -DrecompileMode=all \
+ -Pbigtop-dist \
+ -Pyarn -Phadoop-2.6 \
+ -Phive -Phive-thriftserver \
+ $SPARK_BUILD_OPTS"
+
## this might be an issue at times
# http://maven.40175.n5.nabble.com/Not-finding-artifact-in-local-repo-td3727753.html
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m -XX:PermSize=1024m -XX:MaxPermSize=1024m"
-mvn -Pbigtop-dist -Pyarn -Phive -Phive-thriftserver $BUILD_OPTS install
+mvn $BUILD_OPTS -DskipTests install
+
+# Tests must be run after Spark has already been packaged.
+# See http://spark.apache.org/docs/latest/building-spark.html#spark-tests-in-maven
+if [ "$SPARK_RUN_TESTS" = "true" ]; then
+ mvn $BUILD_OPTS test
+fi
diff --git a/bigtop-packages/src/common/spark/install_spark.sh b/bigtop-packages/src/common/spark/install_spark.sh
index 85078aea..530b3559 100644
--- a/bigtop-packages/src/common/spark/install_spark.sh
+++ b/bigtop-packages/src/common/spark/install_spark.sh
@@ -119,6 +119,10 @@ install -d -m 0755 $PREFIX/$LIB_DIR
install -d -m 0755 $PREFIX/$LIB_DIR/lib
install -d -m 0755 $PREFIX/$LIB_DIR/bin
install -d -m 0755 $PREFIX/$LIB_DIR/sbin
+install -d -m 0755 $PREFIX/$LIB_DIR/extras
+install -d -m 0755 $PREFIX/$LIB_DIR/extras/lib
+install -d -m 0755 $PREFIX/$LIB_DIR/yarn
+install -d -m 0755 $PREFIX/$LIB_DIR/yarn/lib
install -d -m 0755 $PREFIX/$DOC_DIR
install -d -m 0755 $PREFIX/$EXAMPLES_DIR
@@ -134,9 +138,15 @@ tar --wildcards -C $PREFIX/$LIB_DIR/ -zxf ${BUILD_DIR}/assembly/target/spark-ass
rm -rf $PREFIX/$LIB_DIR/bin/*.cmd
+# External/extra jars
+ls ${BUILD_DIR}/{external,extras}/*/target/*${SPARK_VERSION}.jar | grep -v 'original-\|assembly' | xargs -IJARS cp JARS $PREFIX/$LIB_DIR/extras/lib
+
# Examples jar
cp ${BUILD_DIR}/examples/target/spark-examples*${SPARK_VERSION}.jar $PREFIX/$LIB_DIR/lib/spark-examples-${SPARK_VERSION}-hadoop${HADOOP_VERSION}.jar
+# Spark YARN Shuffle jar
+cp ${BUILD_DIR}/network/yarn/target/*/spark-${SPARK_VERSION}-yarn-shuffle.jar $PREFIX/$LIB_DIR/lib/
+
# Examples src
cp -ra ${BUILD_DIR}/examples/src $PREFIX/$EXAMPLES_DIR/
ln -s $EXAMPLES_DIR $PREFIX/$LIB_DIR/examples
@@ -150,12 +160,12 @@ chmod 755 $PREFIX/$LIB_DIR/sbin/*
# Copy in the configuration files
install -d -m 0755 $PREFIX/$CONF_DIR
cp -a ${BUILD_DIR}/conf/* $PREFIX/$CONF_DIR
-cp $PREFIX/$CONF_DIR/spark-env.sh.template $PREFIX/$CONF_DIR/spark-env.sh
+cp $SOURCE_DIR/spark-env.sh $PREFIX/$CONF_DIR
ln -s /etc/spark/conf $PREFIX/$LIB_DIR/conf
# Copy in the wrappers
install -d -m 0755 $PREFIX/$BIN_DIR
-for wrap in sbin/spark-executor bin/spark-shell bin/spark-submit; do
+for wrap in bin/spark-class bin/spark-shell bin/spark-sql bin/spark-submit; do
cat > $PREFIX/$BIN_DIR/`basename $wrap` <<EOF
#!/bin/bash
@@ -167,60 +177,6 @@ EOF
chmod 755 $PREFIX/$BIN_DIR/`basename $wrap`
done
-cat >> $PREFIX/$CONF_DIR/spark-env.sh <<EOF
-export HADOOP_HOME=\${HADOOP_HOME:-/usr/lib/hadoop}
-export HADOOP_CONF_DIR=\${HADOOP_CONF_DIR:-/etc/hadoop/conf}
-
-export SPARK_MASTER_IP=\`hostname\`
-export SPARK_MASTER_PORT=7077
-export SPARK_MASTER_WEBUI_PORT=18080
-export SPARK_WORKER_PORT=7078
-export SPARK_WORKER_WEBUI_PORT=18081
-export SPARK_WORKER_DIR=/var/run/spark/work
-export SPARK_HISTORY_OPTS="\$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082"
-
-export SPARK_LOG_DIR=/var/log/spark
-
-export LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:\${HADOOP_HOME}/lib/native
-EOF
-
-cat >> $PREFIX/$CONF_DIR/hive-site.xml <<EOF
-<?xml version="1.0"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<configuration>
-
-<property>
- <name>javax.jdo.option.ConnectionURL</name>
- <value>jdbc:derby:;databaseName=/tmp/spark-\${user.name}/metastore_db;create=true</value>
- <description>JDBC connect string for a JDBC metastore</description>
-</property>
-
-<property>
- <name>javax.jdo.option.ConnectionDriverName</name>
- <value>org.apache.derby.jdbc.EmbeddedDriver</value>
- <description>Driver class name for a JDBC metastore</description>
-</property>
-
-</configuration>
-
-EOF
-
ln -s /var/run/spark/work $PREFIX/$LIB_DIR/work
cp -r ${BUILD_DIR}/python ${PREFIX}/${INSTALLED_LIB_DIR}/
@@ -242,3 +198,14 @@ cp ${BUILD_DIR}/{LICENSE,NOTICE} ${PREFIX}/${LIB_DIR}/
# Version-less symlinks
(cd $PREFIX/$LIB_DIR/lib; ln -s spark-assembly*.jar spark-assembly.jar; ln -s spark-examples*.jar spark-examples.jar)
+pushd $PREFIX/$LIB_DIR/yarn/lib
+ln -s ../../lib/spark-*-yarn-shuffle.jar spark-yarn-shuffle.jar
+ln -s ../../lib/datanucleus-api-jdo*.jar datanucleus-api-jdo.jar
+ln -s ../../lib/datanucleus-core*.jar datanucleus-core.jar
+ln -s ../../lib/datanucleus-rdbms*.jar datanucleus-rdbms.jar
+popd
+pushd $PREFIX/$LIB_DIR/extras/lib
+for j in $(ls *.jar); do
+ ln -s $j $(echo $j | sed -n 's/\(.*\)\(_[0-9.]\+-[0-9.]\+\)\(.jar\)/\1\3/p')
+done
+popd
diff --git a/bigtop-packages/src/common/spark/patch0.diff b/bigtop-packages/src/common/spark/patch0.diff
new file mode 100644
index 00000000..6b7f350f
--- /dev/null
+++ b/bigtop-packages/src/common/spark/patch0.diff
@@ -0,0 +1,11 @@
+--- spark-1.5.1-patched/pom.xml 2015-09-23 22:50:32.000000000 -0700
++++ spark-1.5.1/pom.xml 2015-10-30 13:46:54.326426316 -0700
+@@ -2366,7 +2366,7 @@
+ <profile>
+ <id>hadoop-2.6</id>
+ <properties>
+- <hadoop.version>2.6.0</hadoop.version>
++ <hadoop.version>${hadoop.version}</hadoop.version>
+ <jets3t.version>0.9.3</jets3t.version>
+ <zookeeper.version>3.4.6</zookeeper.version>
+ <curator.version>2.6.0</curator.version>
diff --git a/bigtop-packages/src/common/spark/spark-env.sh b/bigtop-packages/src/common/spark/spark-env.sh
new file mode 100755
index 00000000..885aed13
--- /dev/null
+++ b/bigtop-packages/src/common/spark/spark-env.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+export SPARK_HOME=${SPARK_HOME:-/usr/lib/spark}
+export SPARK_LOG_DIR=${SPARK_LOG_DIR:-/var/log/spark}
+
+export HADOOP_HOME=${HADOOP_HOME:-/usr/lib/hadoop}
+export HADOOP_HDFS_HOME=${HADOOP_HDFS_HOME:-${HADOOP_HOME}/../hadoop-hdfs}
+export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_HOME}/../hadoop-mapreduce}
+export HADOOP_YARN_HOME=${HADOOP_YARN_HOME:-${HADOOP_HOME}/../hadoop-yarn}
+export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop/conf}
+
+# Let's run everything with JVM runtime, instead of Scala
+export SPARK_LAUNCH_WITH_SCALA=0
+export SPARK_LIBRARY_PATH=${SPARK_LIBRARY_PATH:-${SPARK_HOME}/lib}
+export SCALA_LIBRARY_PATH=${SCALA_LIBRARY_PATH:-${SPARK_HOME}/lib}
+
+# Let's make sure that all needed hadoop libs are added properly
+export CLASSPATH="$CLASSPATH:$HADOOP_HOME/*:$HADOOP_HDFS_HOME/*:$HADOOP_YARN_HOME/*:$HADOOP_MAPRED_HOME/*"
+export SPARK_LIBRARY_PATH=$SPARK_LIBRARY_PATH:${HADOOP_HOME}/lib/native
+
+export STANDALONE_SPARK_MASTER_HOST=`hostname -f`
+export SPARK_MASTER_PORT=7077
+export SPARK_MASTER_WEBUI_PORT=18080
+
+export SPARK_WORKER_DIR=${SPARK_WORKER_DIR:-/var/run/spark/work}
+export SPARK_WORKER_PORT=7078
+export SPARK_WORKER_WEBUI_PORT=18081
+
+export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082"
diff --git a/bigtop-packages/src/deb/spark/control b/bigtop-packages/src/deb/spark/control
index 856d622e..7ebc9703 100644
--- a/bigtop-packages/src/deb/spark/control
+++ b/bigtop-packages/src/deb/spark/control
@@ -60,3 +60,18 @@ Architecture: all
Depends: spark-core (= ${source:Version})
Description: Thrift server for Spark SQL
Thrift server for Spark SQL
+
+Package: spark-datanucleus
+Architecture: all
+Description: DataNucleus libraries for Apache Spark
+ DataNucleus libraries used by Spark SQL with Hive Support
+
+Package: spark-extras
+Architecture: all
+Description: External/extra libraries for Apache Spark
+ External/extra libraries built for Apache Spark but not included in the main assembly JAR (e.g., external streaming libraries)
+
+Package: spark-yarn-shuffle
+Architecture: all
+Description: Spark YARN Shuffle Service
+ Spark YARN Shuffle Service
diff --git a/bigtop-packages/src/deb/spark/spark-core.install b/bigtop-packages/src/deb/spark/spark-core.install
index 7e0d9f7b..70f588c2 100644
--- a/bigtop-packages/src/deb/spark/spark-core.install
+++ b/bigtop-packages/src/deb/spark/spark-core.install
@@ -1,26 +1,26 @@
/etc/spark
-/usr/bin/spark-executor
+/usr/bin/spark-class
/usr/bin/spark-submit
/usr/bin/spark-shell
+/usr/bin/spark-sql
/usr/lib/spark/LICENSE
/usr/lib/spark/RELEASE
/usr/lib/spark/NOTICE
/usr/lib/spark/bin/beeline
-/usr/lib/spark/bin/compute-classpath.sh
/usr/lib/spark/bin/load-spark-env.sh
/usr/lib/spark/bin/run-example
/usr/lib/spark/bin/spark-class
/usr/lib/spark/bin/spark-shell
/usr/lib/spark/bin/spark-sql
/usr/lib/spark/bin/spark-submit
-/usr/lib/spark/bin/utils.sh
/usr/lib/spark/conf
+/usr/lib/spark/data
+/usr/lib/spark/examples
+/usr/lib/spark/lib/spark-assembly*.jar
+/usr/lib/spark/lib/spark-examples*.jar
/usr/lib/spark/sbin
/usr/lib/spark/work
-/usr/lib/spark/lib
-/usr/lib/spark/examples
-/usr/lib/spark/data
-/usr/share/doc/spark
+/usr/share/doc/spark*
/var/lib/spark/
/var/log/spark/
/var/run/spark/
diff --git a/bigtop-packages/src/deb/spark/spark-datanucleus.install b/bigtop-packages/src/deb/spark/spark-datanucleus.install
new file mode 100644
index 00000000..69f619b2
--- /dev/null
+++ b/bigtop-packages/src/deb/spark/spark-datanucleus.install
@@ -0,0 +1,2 @@
+/usr/lib/spark/lib/datanucleus-*.jar
+/usr/lib/spark/yarn/lib/datanucleus-*.jar
diff --git a/bigtop-packages/src/deb/spark/spark-extras.install b/bigtop-packages/src/deb/spark/spark-extras.install
new file mode 100644
index 00000000..c65fbc0f
--- /dev/null
+++ b/bigtop-packages/src/deb/spark/spark-extras.install
@@ -0,0 +1 @@
+/usr/lib/spark/extras/lib
diff --git a/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install b/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install
new file mode 100644
index 00000000..79c799a9
--- /dev/null
+++ b/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install
@@ -0,0 +1,2 @@
+/usr/lib/spark/lib/spark-*-yarn-shuffle.jar
+/usr/lib/spark/yarn/lib/spark-yarn-shuffle.jar
diff --git a/bigtop-packages/src/rpm/spark/SPECS/spark.spec b/bigtop-packages/src/rpm/spark/SPECS/spark.spec
index d8a5c10c..daecc725 100644
--- a/bigtop-packages/src/rpm/spark/SPECS/spark.spec
+++ b/bigtop-packages/src/rpm/spark/SPECS/spark.spec
@@ -119,6 +119,28 @@ Requires: spark-core = %{version}-%{release}
%description -n spark-thriftserver
Thrift server for Spark SQL
+%package -n spark-datanucleus
+Summary: DataNucleus libraries for Apache Spark
+Group: Development/Libraries
+
+%description -n spark-datanucleus
+DataNucleus libraries used by Spark SQL with Hive Support
+
+%package -n spark-extras
+Summary: External/extra libraries for Apache Spark
+Group: Development/Libraries
+
+%description -n spark-extras
+External/extra libraries built for Apache Spark but not included in the main
+assembly JAR (e.g., external streaming libraries)
+
+%package -n spark-yarn-shuffle
+Summary: Spark YARN Shuffle Service
+Group: Development/Libraries
+
+%description -n spark-yarn-shuffle
+Spark YARN Shuffle Service
+
%prep
%setup -n %{spark_name}-%{spark_base_version}
@@ -175,6 +197,8 @@ done
%{lib_spark}/NOTICE
%{lib_spark}/bin
%{lib_spark}/lib
+%exclude %{lib_spark}/lib/datanucleus-*.jar
+%exclude %{lib_spark}/lib/spark-*-yarn-shuffle.jar
%{lib_spark}/sbin
%{lib_spark}/data
%{lib_spark}/examples
@@ -185,8 +209,9 @@ done
%attr(0755,spark,spark) %{var_lib_spark}
%attr(0755,spark,spark) %{var_run_spark}
%attr(0755,spark,spark) %{var_log_spark}
+%{bin}/spark-class
%{bin}/spark-shell
-%{bin}/spark-executor
+%{bin}/spark-sql
%{bin}/spark-submit
%files -n spark-python
@@ -195,6 +220,20 @@ done
%attr(0755,root,root) %{lib_spark}/bin/pyspark
%{lib_spark}/python
+%files -n spark-datanucleus
+%defattr(-,root,root,755)
+%{lib_spark}/lib/datanucleus-*.jar
+%{lib_spark}/yarn/lib/datanucleus-*.jar
+
+%files -n spark-extras
+%defattr(-,root,root,755)
+%{lib_spark}/extras
+
+%files -n spark-yarn-shuffle
+%defattr(-,root,root,755)
+%{lib_spark}/lib/spark-*-yarn-shuffle.jar
+%{lib_spark}/yarn/lib/spark-yarn-shuffle.jar
+
%define service_macro() \
%files -n %1 \
%attr(0755,root,root)/%{initd_dir}/%1 \
diff --git a/bigtop.bom b/bigtop.bom
index 51209f5f..57fd4573 100644
--- a/bigtop.bom
+++ b/bigtop.bom
@@ -282,7 +282,7 @@ bigtop {
name = 'spark'
pkg = 'spark-core'
relNotes = 'Apache Spark'
- version { base = '1.3.1'; pkg = base; release = 1 }
+ version { base = '1.5.1'; pkg = base; release = 1 }
tarball { destination = "$name-${version.base}.tar.gz"
source = "$name-${version.base}.tgz" }
url { download_path = "/$name/$name-${version.base}"