diff options
authorKonstantinos Tsakalozos <konstantinos.tsakalozos@canonical.com>2016-08-26 14:48:27 +0300
committerKevin W Monroe <kevin.monroe@canonical.com>2016-10-07 18:14:30 -0500
commitb033f5c16dee5584137ed17a17b4a2f6de9e98bf (patch)
parente66cc32f0e8cd10444f9d426dc9d14c911104ac5 (diff)
BIGTOP-2522: Add spark processing juju bundle (fixes #140)
Signed-off-by: Kevin W Monroe <kevin.monroe@canonical.com>
7 files changed, 516 insertions, 0 deletions
diff --git a/bigtop-deploy/juju/spark-processing/README.md b/bigtop-deploy/juju/spark-processing/README.md
new file mode 100644
index 00000000..335566b5
--- /dev/null
+++ b/bigtop-deploy/juju/spark-processing/README.md
@@ -0,0 +1,217 @@
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ See the License for the specific language governing permissions and
+ limitations under the License.
+# Overview
+This bundle provides a complete deployment of
+[Apache Spark](https://spark.apache.org/) in standalone HA mode as provided
+by [Apache Bigtop](http://bigtop.apache.org/). Ganglia and rsyslog
+applications are included to monitor cluster health and syslog activity.
+## Bundle Composition
+The applications that comprise this bundle are spread across 7 units as
+ * Spark (Master and Worker)
+ * 3 separate units
+ * Zookeeper
+ * 3 separate units
+ * Ganglia (Web interface for monitoring cluster metrics)
+ * Rsyslog (Aggregate cluster syslog events in a single location)
+ * Colocated on the Ganglia unit
+Deploying this bundle results in a fully configured Apache Bigtop Spark
+cluster on any supported cloud, which can be easily scaled to meet workload
+# Deploying
+A working Juju installation is assumed to be present. If Juju is not yet set
+up, please follow the
+instructions prior to deploying this bundle.
+Once ready, deploy this bundle with the `juju deploy` command:
+ juju deploy spark-processing
+> **Note**: The above assumes Juju 2.0 or greater. If using an earlier version
+of Juju, use [juju-quickstart](https://launchpad.net/juju-quickstart) with the
+following syntax: `juju quickstart spark-processing`.
+The charms in this bundle can also be built from their source layers in the
+[Bigtop charm repository][]. See the [Bigtop charm README][] for instructions
+on building and deploying these charms locally.
+[Bigtop charm repository]: https://github.com/apache/bigtop/tree/master/bigtop-packages/src/charm
+[Bigtop charm README]: https://github.com/apache/bigtop/blob/master/bigtop-packages/src/charm/README.md
+# Verifying
+## Status
+The applications that make up this bundle provide status messages to
+indicate when they are ready:
+ juju status
+This is particularly useful when combined with `watch` to track the on-going
+progress of the deployment:
+ watch -n 0.5 juju status
+The message for each unit will provide information about that unit's state.
+Once they all indicate that they are ready, perform application smoke tests
+to verify that the bundle is working as expected.
+## Smoke Test
+The spark charm provides a `smoke-test` action that can be used to verify the
+application is functioning as expected. Run it as follows:
+ juju run-action spark/0 smoke-test
+> **Note**: The above assumes Juju 2.0 or greater. If using an earlier version
+of Juju, the syntax is `juju action do spark/0 smoke-test`.
+You can watch the progress of the smoke test action with:
+ watch -n 0.5 juju show-action-status
+> **Note**: The above assumes Juju 2.0 or greater. If using an earlier version
+of Juju, the syntax is `juju action status`.
+Eventually, the smoke test should settle to `status: completed`. If
+it reports `status: failed`, Spark is not working as expected. Get
+more information about the smoke-test action
+ juju show-action-output <action-id>
+> **Note**: The above assumes Juju 2.0 or greater. If using an earlier version
+of Juju, the syntax is `juju action fetch <action-id>`.
+# Monitoring
+This bundle includes Ganglia for system-level monitoring of the spark units.
+Metrics are sent to a centralized ganglia unit for easy viewing in a browser.
+To view the ganglia web interface, first expose the service:
+ juju expose ganglia
+Now find the ganglia public IP address:
+ juju status ganglia
+The ganglia web interface will be available at:
+ http://GANGLIA_PUBLIC_IP/ganglia
+# Logging
+This bundle includes rsyslog to collect syslog data from the spark unit. These
+logs are sent to a centralized rsyslog unit for easy syslog analysis. One
+method of viewing this log data is to simply cat syslog from the rsyslog unit:
+ juju run --unit rsyslog/0 'sudo cat /var/log/syslog'
+Logs may also be forwarded to an external rsyslog processing service. See
+the *Forwarding logs to a system outside of the Juju environment* section of
+the [rsyslog README](https://jujucharms.com/rsyslog/) for more information.
+# Benchmarking
+The `spark` charm in this bundle provides several benchmarks to gauge
+the performance of the Spark cluster. Each benchmark is an action that can be
+run with `juju run-action`:
+ $ juju actions spark | grep Bench
+ logisticregression Run the Spark Bench LogisticRegression benchmark.
+ matrixfactorization Run the Spark Bench MatrixFactorization benchmark.
+ pagerank Run the Spark Bench PageRank benchmark.
+ sql Run the Spark Bench SQL benchmark.
+ streaming Run the Spark Bench Streaming benchmark.
+ svdplusplus Run the Spark Bench SVDPlusPlus benchmark.
+ svm Run the Spark Bench SVM benchmark.
+ trianglecount Run the Spark Bench TriangleCount benchmark.
+ $ juju run-action spark/0 pagerank
+ Action queued with id: 339cec1f-e903-4ee7-85ca-876fb0c3d28e
+ $ juju show-action-output 339cec1f-e903-4ee7-85ca-876fb0c3d28e
+ results:
+ meta:
+ composite:
+ direction: asc
+ units: secs
+ value: ".982000"
+ raw: |
+ PageRank,0,.982000,,,,PageRank-MLlibConfig,,,,,10,12,,200000,4.0,1.3,0.15
+ start: 2016-09-22T21:52:26Z
+ stop: 2016-09-22T21:52:33Z
+ results:
+ duration:
+ direction: asc
+ units: secs
+ value: ".982000"
+ throughput:
+ direction: desc
+ units: x/sec
+ value: ""
+ status: completed
+ timing:
+ completed: 2016-09-22 21:52:36 +0000 UTC
+ enqueued: 2016-09-22 21:52:09 +0000 UTC
+ started: 2016-09-22 21:52:13 +0000 UTC
+# Scaling
+By default, three spark units are deployed. To increase the amount of spark
+workers, simply add more units. To add one unit:
+ juju add-unit spark
+Multiple units may be added at once. For example, add four more spark units:
+ juju add-unit -n4 spark
+# Network-Restricted Environments
+Charms can be deployed in environments with limited network access. To deploy
+in this environment, configure a Juju model with appropriate
+proxy and/or mirror options. See
+[Configuring Models](https://jujucharms.com/docs/2.0/models-config) for more
+# Contact Information
+- <bigdata@lists.ubuntu.com>
+# Resources
+- [Apache Bigtop](http://bigtop.apache.org/) home page
+- [Apache Bigtop issue tracking](http://bigtop.apache.org/issue-tracking.html)
+- [Apache Bigtop mailing lists](http://bigtop.apache.org/mail-lists.html)
+- [Juju Bigtop charms](https://jujucharms.com/q/apache/bigtop)
+- [Juju mailing list](https://lists.ubuntu.com/mailman/listinfo/juju)
+- [Juju community](https://jujucharms.com/community)
diff --git a/bigtop-deploy/juju/spark-processing/bundle-dev.yaml b/bigtop-deploy/juju/spark-processing/bundle-dev.yaml
new file mode 100644
index 00000000..aaaf5143
--- /dev/null
+++ b/bigtop-deploy/juju/spark-processing/bundle-dev.yaml
@@ -0,0 +1,76 @@
+ spark:
+ charm: "cs:~bigdata-dev/xenial/spark"
+ num_units: 3
+ annotations:
+ gui-x: "500"
+ gui-y: "0"
+ to:
+ - "1"
+ - "2"
+ - "3"
+ zookeeper:
+ charm: "cs:~charmers/trusty/zookeeper-1"
+ num_units: 3
+ annotations:
+ gui-x: "500"
+ gui-y: "400"
+ to:
+ - "4"
+ - "5"
+ - "6"
+ ganglia:
+ charm: "cs:trusty/ganglia-2"
+ num_units: 1
+ annotations:
+ gui-x: "0"
+ gui-y: "800"
+ to:
+ - "7"
+ ganglia-node:
+ charm: "cs:~bigdata-dev/xenial/ganglia-node-2"
+ annotations:
+ gui-x: "250"
+ gui-y: "400"
+ rsyslog:
+ charm: "cs:trusty/rsyslog-10"
+ num_units: 1
+ annotations:
+ gui-x: "1000"
+ gui-y: "800"
+ to:
+ - "7"
+ rsyslog-forwarder-ha:
+ charm: "cs:~bigdata-dev/xenial/rsyslog-forwarder-ha-2"
+ annotations:
+ gui-x: "750"
+ gui-y: "400"
+series: xenial
+ - [spark, zookeeper]
+ - ["ganglia-node:juju-info", "spark:juju-info"]
+ - ["ganglia:node", "ganglia-node:node"]
+ - ["rsyslog-forwarder-ha:juju-info", "spark:juju-info"]
+ - ["rsyslog:aggregator", "rsyslog-forwarder-ha:syslog"]
+ "1":
+ constraints: "mem=7G"
+ series: "xenial"
+ "2":
+ constraints: "mem=7G"
+ series: "xenial"
+ "3":
+ constraints: "mem=7G"
+ series: "xenial"
+ "4":
+ constraints: "mem=3G"
+ series: "trusty"
+ "5":
+ constraints: "mem=3G"
+ series: "trusty"
+ "6":
+ constraints: "mem=3G"
+ series: "trusty"
+ "7":
+ constraints: "mem=3G"
+ series: "trusty"
diff --git a/bigtop-deploy/juju/spark-processing/bundle-local.yaml b/bigtop-deploy/juju/spark-processing/bundle-local.yaml
new file mode 100644
index 00000000..b8a4d312
--- /dev/null
+++ b/bigtop-deploy/juju/spark-processing/bundle-local.yaml
@@ -0,0 +1,76 @@
+ spark:
+ charm: "/home/ubuntu/charms/xenial/spark"
+ num_units: 3
+ annotations:
+ gui-x: "500"
+ gui-y: "0"
+ to:
+ - "1"
+ - "2"
+ - "3"
+ zookeeper:
+ charm: "cs:~charmers/trusty/zookeeper-1"
+ num_units: 3
+ annotations:
+ gui-x: "500"
+ gui-y: "400"
+ to:
+ - "4"
+ - "5"
+ - "6"
+ ganglia:
+ charm: "cs:trusty/ganglia-2"
+ num_units: 1
+ annotations:
+ gui-x: "0"
+ gui-y: "800"
+ to:
+ - "7"
+ ganglia-node:
+ charm: "cs:~bigdata-dev/xenial/ganglia-node-2"
+ annotations:
+ gui-x: "250"
+ gui-y: "400"
+ rsyslog:
+ charm: "cs:trusty/rsyslog-10"
+ num_units: 1
+ annotations:
+ gui-x: "1000"
+ gui-y: "800"
+ to:
+ - "7"
+ rsyslog-forwarder-ha:
+ charm: "cs:~bigdata-dev/xenial/rsyslog-forwarder-ha-2"
+ annotations:
+ gui-x: "750"
+ gui-y: "400"
+series: xenial
+ - [spark, zookeeper]
+ - ["ganglia-node:juju-info", "spark:juju-info"]
+ - ["ganglia:node", "ganglia-node:node"]
+ - ["rsyslog-forwarder-ha:juju-info", "spark:juju-info"]
+ - ["rsyslog:aggregator", "rsyslog-forwarder-ha:syslog"]
+ "1":
+ constraints: "mem=7G"
+ series: "xenial"
+ "2":
+ constraints: "mem=7G"
+ series: "xenial"
+ "3":
+ constraints: "mem=7G"
+ series: "xenial"
+ "4":
+ constraints: "mem=3G"
+ series: "trusty"
+ "5":
+ constraints: "mem=3G"
+ series: "trusty"
+ "6":
+ constraints: "mem=3G"
+ series: "trusty"
+ "7":
+ constraints: "mem=3G"
+ series: "trusty"
diff --git a/bigtop-deploy/juju/spark-processing/bundle.yaml b/bigtop-deploy/juju/spark-processing/bundle.yaml
new file mode 100644
index 00000000..d36ed434
--- /dev/null
+++ b/bigtop-deploy/juju/spark-processing/bundle.yaml
@@ -0,0 +1,76 @@
+ spark:
+ charm: "cs:xenial/spark-2"
+ num_units: 3
+ annotations:
+ gui-x: "500"
+ gui-y: "0"
+ to:
+ - "1"
+ - "2"
+ - "3"
+ zookeeper:
+ charm: "cs:~charmers/trusty/zookeeper-1"
+ num_units: 3
+ annotations:
+ gui-x: "500"
+ gui-y: "400"
+ to:
+ - "4"
+ - "5"
+ - "6"
+ ganglia:
+ charm: "cs:trusty/ganglia-2"
+ num_units: 1
+ annotations:
+ gui-x: "0"
+ gui-y: "800"
+ to:
+ - "7"
+ ganglia-node:
+ charm: "cs:~bigdata-dev/xenial/ganglia-node-2"
+ annotations:
+ gui-x: "250"
+ gui-y: "400"
+ rsyslog:
+ charm: "cs:trusty/rsyslog-10"
+ num_units: 1
+ annotations:
+ gui-x: "1000"
+ gui-y: "800"
+ to:
+ - "7"
+ rsyslog-forwarder-ha:
+ charm: "cs:~bigdata-dev/xenial/rsyslog-forwarder-ha-2"
+ annotations:
+ gui-x: "750"
+ gui-y: "400"
+series: xenial
+ - [spark, zookeeper]
+ - ["ganglia-node:juju-info", "spark:juju-info"]
+ - ["ganglia:node", "ganglia-node:node"]
+ - ["rsyslog-forwarder-ha:juju-info", "spark:juju-info"]
+ - ["rsyslog:aggregator", "rsyslog-forwarder-ha:syslog"]
+ "1":
+ constraints: "mem=7G"
+ series: "xenial"
+ "2":
+ constraints: "mem=7G"
+ series: "xenial"
+ "3":
+ constraints: "mem=7G"
+ series: "xenial"
+ "4":
+ constraints: "mem=3G"
+ series: "trusty"
+ "5":
+ constraints: "mem=3G"
+ series: "trusty"
+ "6":
+ constraints: "mem=3G"
+ series: "trusty"
+ "7":
+ constraints: "mem=3G"
+ series: "trusty"
diff --git a/bigtop-deploy/juju/spark-processing/copyright b/bigtop-deploy/juju/spark-processing/copyright
new file mode 100644
index 00000000..e900b97c
--- /dev/null
+++ b/bigtop-deploy/juju/spark-processing/copyright
@@ -0,0 +1,16 @@
+Format: http://dep.debian.net/deps/dep5/
+Files: *
+Copyright: Copyright 2015, Canonical Ltd., All Rights Reserved.
+License: Apache License 2.0
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ .
+ http://www.apache.org/licenses/LICENSE-2.0
+ .
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/bigtop-deploy/juju/spark-processing/tests/01-bundle.py b/bigtop-deploy/juju/spark-processing/tests/01-bundle.py
new file mode 100755
index 00000000..379778c9
--- /dev/null
+++ b/bigtop-deploy/juju/spark-processing/tests/01-bundle.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import unittest
+import amulet
+import yaml
+class TestBundle(unittest.TestCase):
+ bundle_file = os.path.join(os.path.dirname(__file__), '..', 'bundle.yaml')
+ @classmethod
+ def setUpClass(cls):
+ cls.d = amulet.Deployment(series='xenial')
+ with open(cls.bundle_file) as f:
+ bun = f.read()
+ bundle = yaml.safe_load(bun)
+ cls.d.load(bundle)
+ cls.d.setup(timeout=1800)
+ cls.d.sentry.wait_for_messages({'spark': 'ready (standalone - HA)'}, timeout=1800)
+ cls.spark = cls.d.sentry['spark'][0]
+ def test_components(self):
+ """
+ Confirm that all of the required components are up and running.
+ """
+ spark, retcode = self.spark.run("pgrep -a java")
+ assert 'spark' in spark, 'Spark should be running on spark'
+ def test_spark(self):
+ output, retcode = self.spark.run("su ubuntu -c 'bash -lc /home/ubuntu/sparkpi.sh 2>&1'")
+ assert 'Pi is roughly' in output, 'SparkPI test failed: %s' % output
+if __name__ == '__main__':
+ unittest.main()
diff --git a/bigtop-deploy/juju/spark-processing/tests/tests.yaml b/bigtop-deploy/juju/spark-processing/tests/tests.yaml
new file mode 100644
index 00000000..1ec8b824
--- /dev/null
+++ b/bigtop-deploy/juju/spark-processing/tests/tests.yaml
@@ -0,0 +1,2 @@
+ - amulet