diff options
author | Jonathan Kelly <jonathak@amazon.com> | 2015-10-02 15:13:18 -0700 |
---|---|---|
committer | Konstantin Boudnik <cos@apache.org> | 2015-11-02 13:02:28 -0800 |
commit | 05d9fada278f23c97079afb2429b590afa100b1e (patch) | |
tree | 5b7c513e3a193d16df6ddbb5824565b9fa591e59 | |
parent | bf67f6e5bf72934c577f2eeaabe063aff3a9ca99 (diff) |
BIGTOP-2104. Packages upgrade to Spark 1.5.1
-rw-r--r-- | bigtop-packages/src/common/spark/do-component-build | 15 | ||||
-rw-r--r-- | bigtop-packages/src/common/spark/install_spark.sh | 79 | ||||
-rw-r--r-- | bigtop-packages/src/common/spark/patch0.diff | 11 | ||||
-rwxr-xr-x | bigtop-packages/src/common/spark/spark-env.sh | 43 | ||||
-rw-r--r-- | bigtop-packages/src/deb/spark/control | 15 | ||||
-rw-r--r-- | bigtop-packages/src/deb/spark/spark-core.install | 14 | ||||
-rw-r--r-- | bigtop-packages/src/deb/spark/spark-datanucleus.install | 2 | ||||
-rw-r--r-- | bigtop-packages/src/deb/spark/spark-extras.install | 1 | ||||
-rw-r--r-- | bigtop-packages/src/deb/spark/spark-yarn-shuffle.install | 2 | ||||
-rw-r--r-- | bigtop-packages/src/rpm/spark/SPECS/spark.spec | 41 | ||||
-rw-r--r-- | bigtop.bom | 2 |
11 files changed, 158 insertions, 67 deletions
diff --git a/bigtop-packages/src/common/spark/do-component-build b/bigtop-packages/src/common/spark/do-component-build index 3d705a54..8d74f4a4 100644 --- a/bigtop-packages/src/common/spark/do-component-build +++ b/bigtop-packages/src/common/spark/do-component-build @@ -24,9 +24,20 @@ BUILD_OPTS="-Divy.home=${HOME}/.ivy2 -Dsbt.ivy.home=${HOME}/.ivy2 -Duser.home=${ -Dhadoop.version=$HADOOP_VERSION \ -Dyarn.version=$HADOOP_VERSION \ -Dprotobuf.version=2.5.0 \ - -DskipTests -DrecompileMode=all" + -DrecompileMode=all \ + -Pbigtop-dist \ + -Pyarn -Phadoop-2.6 \ + -Phive -Phive-thriftserver \ + $SPARK_BUILD_OPTS" + ## this might be an issue at times # http://maven.40175.n5.nabble.com/Not-finding-artifact-in-local-repo-td3727753.html export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m -XX:PermSize=1024m -XX:MaxPermSize=1024m" -mvn -Pbigtop-dist -Pyarn -Phive -Phive-thriftserver $BUILD_OPTS install +mvn $BUILD_OPTS -DskipTests install + +# Tests must be run after Spark has already been packaged. +# See http://spark.apache.org/docs/latest/building-spark.html#spark-tests-in-maven +if [ "$SPARK_RUN_TESTS" = "true" ]; then + mvn $BUILD_OPTS test +fi diff --git a/bigtop-packages/src/common/spark/install_spark.sh b/bigtop-packages/src/common/spark/install_spark.sh index 85078aea..530b3559 100644 --- a/bigtop-packages/src/common/spark/install_spark.sh +++ b/bigtop-packages/src/common/spark/install_spark.sh @@ -119,6 +119,10 @@ install -d -m 0755 $PREFIX/$LIB_DIR install -d -m 0755 $PREFIX/$LIB_DIR/lib install -d -m 0755 $PREFIX/$LIB_DIR/bin install -d -m 0755 $PREFIX/$LIB_DIR/sbin +install -d -m 0755 $PREFIX/$LIB_DIR/extras +install -d -m 0755 $PREFIX/$LIB_DIR/extras/lib +install -d -m 0755 $PREFIX/$LIB_DIR/yarn +install -d -m 0755 $PREFIX/$LIB_DIR/yarn/lib install -d -m 0755 $PREFIX/$DOC_DIR install -d -m 0755 $PREFIX/$EXAMPLES_DIR @@ -134,9 +138,15 @@ tar --wildcards -C $PREFIX/$LIB_DIR/ -zxf ${BUILD_DIR}/assembly/target/spark-ass rm -rf $PREFIX/$LIB_DIR/bin/*.cmd +# External/extra jars +ls ${BUILD_DIR}/{external,extras}/*/target/*${SPARK_VERSION}.jar | grep -v 'original-\|assembly' | xargs -IJARS cp JARS $PREFIX/$LIB_DIR/extras/lib + # Examples jar cp ${BUILD_DIR}/examples/target/spark-examples*${SPARK_VERSION}.jar $PREFIX/$LIB_DIR/lib/spark-examples-${SPARK_VERSION}-hadoop${HADOOP_VERSION}.jar +# Spark YARN Shuffle jar +cp ${BUILD_DIR}/network/yarn/target/*/spark-${SPARK_VERSION}-yarn-shuffle.jar $PREFIX/$LIB_DIR/lib/ + # Examples src cp -ra ${BUILD_DIR}/examples/src $PREFIX/$EXAMPLES_DIR/ ln -s $EXAMPLES_DIR $PREFIX/$LIB_DIR/examples @@ -150,12 +160,12 @@ chmod 755 $PREFIX/$LIB_DIR/sbin/* # Copy in the configuration files install -d -m 0755 $PREFIX/$CONF_DIR cp -a ${BUILD_DIR}/conf/* $PREFIX/$CONF_DIR -cp $PREFIX/$CONF_DIR/spark-env.sh.template $PREFIX/$CONF_DIR/spark-env.sh +cp $SOURCE_DIR/spark-env.sh $PREFIX/$CONF_DIR ln -s /etc/spark/conf $PREFIX/$LIB_DIR/conf # Copy in the wrappers install -d -m 0755 $PREFIX/$BIN_DIR -for wrap in sbin/spark-executor bin/spark-shell bin/spark-submit; do +for wrap in bin/spark-class bin/spark-shell bin/spark-sql bin/spark-submit; do cat > $PREFIX/$BIN_DIR/`basename $wrap` <<EOF #!/bin/bash @@ -167,60 +177,6 @@ EOF chmod 755 $PREFIX/$BIN_DIR/`basename $wrap` done -cat >> $PREFIX/$CONF_DIR/spark-env.sh <<EOF -export HADOOP_HOME=\${HADOOP_HOME:-/usr/lib/hadoop} -export HADOOP_CONF_DIR=\${HADOOP_CONF_DIR:-/etc/hadoop/conf} - -export SPARK_MASTER_IP=\`hostname\` -export SPARK_MASTER_PORT=7077 -export SPARK_MASTER_WEBUI_PORT=18080 -export SPARK_WORKER_PORT=7078 -export SPARK_WORKER_WEBUI_PORT=18081 -export SPARK_WORKER_DIR=/var/run/spark/work -export SPARK_HISTORY_OPTS="\$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082" - -export SPARK_LOG_DIR=/var/log/spark - -export LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:\${HADOOP_HOME}/lib/native -EOF - -cat >> $PREFIX/$CONF_DIR/hive-site.xml <<EOF -<?xml version="1.0"?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> -<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> -<configuration> - -<property> - <name>javax.jdo.option.ConnectionURL</name> - <value>jdbc:derby:;databaseName=/tmp/spark-\${user.name}/metastore_db;create=true</value> - <description>JDBC connect string for a JDBC metastore</description> -</property> - -<property> - <name>javax.jdo.option.ConnectionDriverName</name> - <value>org.apache.derby.jdbc.EmbeddedDriver</value> - <description>Driver class name for a JDBC metastore</description> -</property> - -</configuration> - -EOF - ln -s /var/run/spark/work $PREFIX/$LIB_DIR/work cp -r ${BUILD_DIR}/python ${PREFIX}/${INSTALLED_LIB_DIR}/ @@ -242,3 +198,14 @@ cp ${BUILD_DIR}/{LICENSE,NOTICE} ${PREFIX}/${LIB_DIR}/ # Version-less symlinks (cd $PREFIX/$LIB_DIR/lib; ln -s spark-assembly*.jar spark-assembly.jar; ln -s spark-examples*.jar spark-examples.jar) +pushd $PREFIX/$LIB_DIR/yarn/lib +ln -s ../../lib/spark-*-yarn-shuffle.jar spark-yarn-shuffle.jar +ln -s ../../lib/datanucleus-api-jdo*.jar datanucleus-api-jdo.jar +ln -s ../../lib/datanucleus-core*.jar datanucleus-core.jar +ln -s ../../lib/datanucleus-rdbms*.jar datanucleus-rdbms.jar +popd +pushd $PREFIX/$LIB_DIR/extras/lib +for j in $(ls *.jar); do + ln -s $j $(echo $j | sed -n 's/\(.*\)\(_[0-9.]\+-[0-9.]\+\)\(.jar\)/\1\3/p') +done +popd diff --git a/bigtop-packages/src/common/spark/patch0.diff b/bigtop-packages/src/common/spark/patch0.diff new file mode 100644 index 00000000..6b7f350f --- /dev/null +++ b/bigtop-packages/src/common/spark/patch0.diff @@ -0,0 +1,11 @@ +--- spark-1.5.1-patched/pom.xml 2015-09-23 22:50:32.000000000 -0700 ++++ spark-1.5.1/pom.xml 2015-10-30 13:46:54.326426316 -0700 +@@ -2366,7 +2366,7 @@ + <profile> + <id>hadoop-2.6</id> + <properties> +- <hadoop.version>2.6.0</hadoop.version> ++ <hadoop.version>${hadoop.version}</hadoop.version> + <jets3t.version>0.9.3</jets3t.version> + <zookeeper.version>3.4.6</zookeeper.version> + <curator.version>2.6.0</curator.version> diff --git a/bigtop-packages/src/common/spark/spark-env.sh b/bigtop-packages/src/common/spark/spark-env.sh new file mode 100755 index 00000000..885aed13 --- /dev/null +++ b/bigtop-packages/src/common/spark/spark-env.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +export SPARK_HOME=${SPARK_HOME:-/usr/lib/spark} +export SPARK_LOG_DIR=${SPARK_LOG_DIR:-/var/log/spark} + +export HADOOP_HOME=${HADOOP_HOME:-/usr/lib/hadoop} +export HADOOP_HDFS_HOME=${HADOOP_HDFS_HOME:-${HADOOP_HOME}/../hadoop-hdfs} +export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_HOME}/../hadoop-mapreduce} +export HADOOP_YARN_HOME=${HADOOP_YARN_HOME:-${HADOOP_HOME}/../hadoop-yarn} +export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop/conf} + +# Let's run everything with JVM runtime, instead of Scala +export SPARK_LAUNCH_WITH_SCALA=0 +export SPARK_LIBRARY_PATH=${SPARK_LIBRARY_PATH:-${SPARK_HOME}/lib} +export SCALA_LIBRARY_PATH=${SCALA_LIBRARY_PATH:-${SPARK_HOME}/lib} + +# Let's make sure that all needed hadoop libs are added properly +export CLASSPATH="$CLASSPATH:$HADOOP_HOME/*:$HADOOP_HDFS_HOME/*:$HADOOP_YARN_HOME/*:$HADOOP_MAPRED_HOME/*" +export SPARK_LIBRARY_PATH=$SPARK_LIBRARY_PATH:${HADOOP_HOME}/lib/native + +export STANDALONE_SPARK_MASTER_HOST=`hostname -f` +export SPARK_MASTER_PORT=7077 +export SPARK_MASTER_WEBUI_PORT=18080 + +export SPARK_WORKER_DIR=${SPARK_WORKER_DIR:-/var/run/spark/work} +export SPARK_WORKER_PORT=7078 +export SPARK_WORKER_WEBUI_PORT=18081 + +export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082" diff --git a/bigtop-packages/src/deb/spark/control b/bigtop-packages/src/deb/spark/control index 856d622e..7ebc9703 100644 --- a/bigtop-packages/src/deb/spark/control +++ b/bigtop-packages/src/deb/spark/control @@ -60,3 +60,18 @@ Architecture: all Depends: spark-core (= ${source:Version}) Description: Thrift server for Spark SQL Thrift server for Spark SQL + +Package: spark-datanucleus +Architecture: all +Description: DataNucleus libraries for Apache Spark + DataNucleus libraries used by Spark SQL with Hive Support + +Package: spark-extras +Architecture: all +Description: External/extra libraries for Apache Spark + External/extra libraries built for Apache Spark but not included in the main assembly JAR (e.g., external streaming libraries) + +Package: spark-yarn-shuffle +Architecture: all +Description: Spark YARN Shuffle Service + Spark YARN Shuffle Service diff --git a/bigtop-packages/src/deb/spark/spark-core.install b/bigtop-packages/src/deb/spark/spark-core.install index 7e0d9f7b..70f588c2 100644 --- a/bigtop-packages/src/deb/spark/spark-core.install +++ b/bigtop-packages/src/deb/spark/spark-core.install @@ -1,26 +1,26 @@ /etc/spark -/usr/bin/spark-executor +/usr/bin/spark-class /usr/bin/spark-submit /usr/bin/spark-shell +/usr/bin/spark-sql /usr/lib/spark/LICENSE /usr/lib/spark/RELEASE /usr/lib/spark/NOTICE /usr/lib/spark/bin/beeline -/usr/lib/spark/bin/compute-classpath.sh /usr/lib/spark/bin/load-spark-env.sh /usr/lib/spark/bin/run-example /usr/lib/spark/bin/spark-class /usr/lib/spark/bin/spark-shell /usr/lib/spark/bin/spark-sql /usr/lib/spark/bin/spark-submit -/usr/lib/spark/bin/utils.sh /usr/lib/spark/conf +/usr/lib/spark/data +/usr/lib/spark/examples +/usr/lib/spark/lib/spark-assembly*.jar +/usr/lib/spark/lib/spark-examples*.jar /usr/lib/spark/sbin /usr/lib/spark/work -/usr/lib/spark/lib -/usr/lib/spark/examples -/usr/lib/spark/data -/usr/share/doc/spark +/usr/share/doc/spark* /var/lib/spark/ /var/log/spark/ /var/run/spark/ diff --git a/bigtop-packages/src/deb/spark/spark-datanucleus.install b/bigtop-packages/src/deb/spark/spark-datanucleus.install new file mode 100644 index 00000000..69f619b2 --- /dev/null +++ b/bigtop-packages/src/deb/spark/spark-datanucleus.install @@ -0,0 +1,2 @@ +/usr/lib/spark/lib/datanucleus-*.jar +/usr/lib/spark/yarn/lib/datanucleus-*.jar diff --git a/bigtop-packages/src/deb/spark/spark-extras.install b/bigtop-packages/src/deb/spark/spark-extras.install new file mode 100644 index 00000000..c65fbc0f --- /dev/null +++ b/bigtop-packages/src/deb/spark/spark-extras.install @@ -0,0 +1 @@ +/usr/lib/spark/extras/lib diff --git a/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install b/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install new file mode 100644 index 00000000..79c799a9 --- /dev/null +++ b/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install @@ -0,0 +1,2 @@ +/usr/lib/spark/lib/spark-*-yarn-shuffle.jar +/usr/lib/spark/yarn/lib/spark-yarn-shuffle.jar diff --git a/bigtop-packages/src/rpm/spark/SPECS/spark.spec b/bigtop-packages/src/rpm/spark/SPECS/spark.spec index d8a5c10c..daecc725 100644 --- a/bigtop-packages/src/rpm/spark/SPECS/spark.spec +++ b/bigtop-packages/src/rpm/spark/SPECS/spark.spec @@ -119,6 +119,28 @@ Requires: spark-core = %{version}-%{release} %description -n spark-thriftserver Thrift server for Spark SQL +%package -n spark-datanucleus +Summary: DataNucleus libraries for Apache Spark +Group: Development/Libraries + +%description -n spark-datanucleus +DataNucleus libraries used by Spark SQL with Hive Support + +%package -n spark-extras +Summary: External/extra libraries for Apache Spark +Group: Development/Libraries + +%description -n spark-extras +External/extra libraries built for Apache Spark but not included in the main +assembly JAR (e.g., external streaming libraries) + +%package -n spark-yarn-shuffle +Summary: Spark YARN Shuffle Service +Group: Development/Libraries + +%description -n spark-yarn-shuffle +Spark YARN Shuffle Service + %prep %setup -n %{spark_name}-%{spark_base_version} @@ -175,6 +197,8 @@ done %{lib_spark}/NOTICE %{lib_spark}/bin %{lib_spark}/lib +%exclude %{lib_spark}/lib/datanucleus-*.jar +%exclude %{lib_spark}/lib/spark-*-yarn-shuffle.jar %{lib_spark}/sbin %{lib_spark}/data %{lib_spark}/examples @@ -185,8 +209,9 @@ done %attr(0755,spark,spark) %{var_lib_spark} %attr(0755,spark,spark) %{var_run_spark} %attr(0755,spark,spark) %{var_log_spark} +%{bin}/spark-class %{bin}/spark-shell -%{bin}/spark-executor +%{bin}/spark-sql %{bin}/spark-submit %files -n spark-python @@ -195,6 +220,20 @@ done %attr(0755,root,root) %{lib_spark}/bin/pyspark %{lib_spark}/python +%files -n spark-datanucleus +%defattr(-,root,root,755) +%{lib_spark}/lib/datanucleus-*.jar +%{lib_spark}/yarn/lib/datanucleus-*.jar + +%files -n spark-extras +%defattr(-,root,root,755) +%{lib_spark}/extras + +%files -n spark-yarn-shuffle +%defattr(-,root,root,755) +%{lib_spark}/lib/spark-*-yarn-shuffle.jar +%{lib_spark}/yarn/lib/spark-yarn-shuffle.jar + %define service_macro() \ %files -n %1 \ %attr(0755,root,root)/%{initd_dir}/%1 \ @@ -282,7 +282,7 @@ bigtop { name = 'spark' pkg = 'spark-core' relNotes = 'Apache Spark' - version { base = '1.3.1'; pkg = base; release = 1 } + version { base = '1.5.1'; pkg = base; release = 1 } tarball { destination = "$name-${version.base}.tar.gz" source = "$name-${version.base}.tgz" } url { download_path = "/$name/$name-${version.base}" |