diff options
Diffstat (limited to 'bigtop-packages/src/charm/zeppelin/layer-zeppelin')
8 files changed, 186 insertions, 78 deletions
diff --git a/bigtop-packages/src/charm/zeppelin/layer-zeppelin/README.md b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/README.md index a303bc2d..1003bfe4 100644 --- a/bigtop-packages/src/charm/zeppelin/layer-zeppelin/README.md +++ b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/README.md @@ -32,28 +32,59 @@ This charm deploys version 0.7.0 of the Zeppelin component from This charm requires Juju 2.0 or greater. If Juju is not yet set up, please follow the [getting-started][] instructions prior to deploying this charm. -This charm is intended to be deployed via one of the [apache bigtop bundles][]. +Zeppelin can be deployed by itself as a stand-alone web notebook. Deployment +is simple: + + juju deploy zeppelin + +To access the web interface, find the `Public address` of the `zeppelin` +application and expose it: + + juju status zeppelin + juju expose zeppelin + +The web interface will be available at the following URL: + + http://ZEPPELIN_PUBLIC_IP:9080 + +This charm also supports more complex integration scenarios as described below. + +## Hadoop Integration +This charm may be deployed alongside any of the [Apache Bigtop bundles][]. For example: juju deploy hadoop-processing -This will deploy an Apache Bigtop Hadoop cluster. More information about this +This will deploy a basic Bigtop Hadoop cluster. More information about this deployment can be found in the [bundle readme](https://jujucharms.com/hadoop-processing/). -Now add Zeppelin and relate it to the cluster via the hadoop-plugin: +Now relate the previously deployed `zeppelin` charm to the Hadoop plugin. This +enables communication between Zeppelin and Hadoop: - juju deploy zeppelin juju add-relation zeppelin plugin -To access the web console, find the `PUBLIC-ADDRESS` of the -zeppelin application and expose it: +Once deployment is complete, Zeppelin notebooks will have access to the +Hadoop Distributed File System (HDFS). Additionally, the local Spark driver +will be reconfigured in YARN mode. Any notebooks that submit Spark jobs will +leverage the Hadoop compute resources deployed by the `hadoop-processing` +bundle. - juju status zeppelin - juju expose zeppelin +## Spark Integration +Zeppelin includes a local Spark driver by default. This allows notebooks to +use a SparkContext without needing external Spark resources. This driver can +process jobs using local machine resources or compute resources from a Hadoop +cluster as mentioned above. -The web interface will be available at the following URL: +Zeppelin's Spark driver can also use external Spark cluster resources. For +example, the following will deploy a 3-unit Spark cluster that Zeppelin will +use when submitting jobs: - http://ZEPPELIN_PUBLIC_IP:9080 + juju deploy spark -n 3 + juju relate zeppelin spark + +Once deployment is complete, the local Spark driver will be reconfigured to +use the external cluster as the Spark Master. Any notebooks that submit Spark +jobs will leverage the newly deployed `spark` units. ## Network-Restricted Environments Charms can be deployed in environments with limited network access. To deploy @@ -99,6 +130,13 @@ more information about a specific smoke test with: juju show-action-output <action-id> +# Limitations + +When related to Spark, Zeppelin requires a `spark://xxx.xxx.xxx.xxx:7077` +URL for the Spark Master. This is only available when the `spark` charm is +in `standalone` mode -- `local` and `yarn` modes are not supported. + + # Issues Apache Bigtop tracks issues using JIRA (Apache account required). File an diff --git a/bigtop-packages/src/charm/zeppelin/layer-zeppelin/actions/smoke-test b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/actions/smoke-test index 7305822b..e2e59598 100755 --- a/bigtop-packages/src/charm/zeppelin/layer-zeppelin/actions/smoke-test +++ b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/actions/smoke-test @@ -77,7 +77,7 @@ try: errmsg = para['result']['msg'].splitlines()[0] else: errmsg = 'Unable to find error message' - hookenv.action_set('paragraph', resp.text) + hookenv.action_set({'paragraph': resp.text}) fail('Notebook failed: {}'.format(errmsg)) except requests.exceptions.RequestException as e: fail('Request failed: {}: {}'.format(e.request.url, e)) diff --git a/bigtop-packages/src/charm/zeppelin/layer-zeppelin/layer.yaml b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/layer.yaml index 19626b4b..39a7ba32 100644 --- a/bigtop-packages/src/charm/zeppelin/layer-zeppelin/layer.yaml +++ b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/layer.yaml @@ -26,13 +26,9 @@ options: zeppelin_notebooks: path: '/var/lib/zeppelin/notebook' ports: - # Ports that need to be exposed, overridden, or manually specified. - # Only expose ports serving a UI or external API (i.e., namenode and - # resourcemanager). Communication among units within the cluster does - # not need ports to be explicitly opened. zeppelin: port: 9080 exposed_on: 'zeppelin' - zeppelin_web: + zeppelin_websocket: port: 9081 exposed_on: 'zeppelin' diff --git a/bigtop-packages/src/charm/zeppelin/layer-zeppelin/lib/charms/layer/bigtop_zeppelin.py b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/lib/charms/layer/bigtop_zeppelin.py index 0bb545a7..44512d29 100644 --- a/bigtop-packages/src/charm/zeppelin/layer-zeppelin/lib/charms/layer/bigtop_zeppelin.py +++ b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/lib/charms/layer/bigtop_zeppelin.py @@ -56,11 +56,22 @@ class Zeppelin(object): self._add_override('zeppelin::server::server_port', self.dist_config.port('zeppelin')) self._add_override('zeppelin::server::web_socket_port', - self.dist_config.port('zeppelin_web')) + self.dist_config.port('zeppelin_websocket')) # Default spark to local mode on initial install. This will be # reconfigured if/when hadoop or spark relations are made. - self._add_override('zeppelin::server::spark_master_url', 'local[*]') + local_master = 'local[*]' + self._add_override('zeppelin::server::spark_master_url', local_master) + + # The spark-client role expects hdfs by default. Since we want to + # keep Hadoop optional, ensure we remove hadoopy bits from our + # local spark config. This has no effect if/when a remote spark joins, + # and since there is no spark history server running, the event dirs + # are not important -- they just need not be 'hdfs:///blah'. + events_log_dir = 'file:///tmp' + self._add_override('spark::common::master_url', local_master) + self._add_override('spark::common::event_log_dir', events_log_dir) + self._add_override('spark::common::history_log_dir', events_log_dir) ########## # BUG: BIGTOP-2742 @@ -116,18 +127,6 @@ class Zeppelin(object): bigtop.trigger_puppet() self.wait_for_api(30) - def setup_etc_env(self): - ''' - Write some niceties to /etc/environment - ''' - # Configure system-wide bits - zeppelin_bin = self.dist_config.path('zeppelin') / 'bin' - zeppelin_conf = self.dist_config.path('zeppelin_conf') - with utils.environment_edit_in_place('/etc/environment') as env: - if zeppelin_bin not in env['PATH']: - env['PATH'] = ':'.join([env['PATH'], zeppelin_bin]) - env['ZEPPELIN_CONF_DIR'] = zeppelin_conf - def reconfigure_zeppelin(self): ''' Configure zeppelin based on current environment diff --git a/bigtop-packages/src/charm/zeppelin/layer-zeppelin/reactive/zeppelin.py b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/reactive/zeppelin.py index a04a59be..7c9ca06e 100644 --- a/bigtop-packages/src/charm/zeppelin/layer-zeppelin/reactive/zeppelin.py +++ b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/reactive/zeppelin.py @@ -27,40 +27,46 @@ from charms.reactive.helpers import data_changed def update_status(): hadoop_joined = is_state('hadoop.joined') hadoop_ready = is_state('hadoop.ready') - hive_joined = is_state('hive.connected') - hive_ready = is_state('hive.available') + hive_joined = is_state('hive.joined') + hive_ready = is_state('hive.ready') spark_joined = is_state('spark.joined') spark_ready = is_state('spark.ready') + spark_blocked = is_state('spark.master.unusable') - waiting_apps = [] - ready_apps = [] - # Check status of the hadoop plugin - if hadoop_joined and not hadoop_ready: - waiting_apps.append('hadoop') - elif hadoop_ready: - ready_apps.append('hadoop') - - # Check status of Hive - if hive_joined and not hive_ready: - waiting_apps.append('hive') - elif hive_ready: - ready_apps.append('hive') - - # Check status of Spark - if spark_joined and not spark_ready: - waiting_apps.append('spark') - elif spark_ready: - ready_apps.append('spark') - - # Set appropriate status based on the apps we checked above - if waiting_apps: - hookenv.status_set('waiting', - 'waiting for: {}'.format(' & '.join(waiting_apps))) - elif ready_apps: - hookenv.status_set('active', - 'ready with: {}'.format(' & '.join(ready_apps))) + # handle blockers first; then report what's ready/waiting + if spark_blocked: + hookenv.status_set('blocked', + 'remote spark must be in standalone mode') else: - hookenv.status_set('active', 'ready') + waiting_apps = [] + ready_apps = [] + # Check status of the hadoop plugin + if hadoop_joined and not hadoop_ready: + waiting_apps.append('hadoop') + elif hadoop_ready: + ready_apps.append('hadoop') + + # Check status of Hive + if hive_joined and not hive_ready: + waiting_apps.append('hive') + elif hive_ready: + ready_apps.append('hive') + + # Check status of Spark + if spark_joined and not spark_ready: + waiting_apps.append('spark') + elif spark_ready: + ready_apps.append('spark') + + # Set appropriate status based on the apps we checked above + if waiting_apps: + hookenv.status_set('waiting', + 'waiting for: {}'.format(' & '.join(waiting_apps))) + elif ready_apps: + hookenv.status_set('active', + 'ready with: {}'.format(' & '.join(ready_apps))) + else: + hookenv.status_set('active', 'ready') @when('bigtop.available') @@ -69,7 +75,6 @@ def initial_setup(): hookenv.status_set('maintenance', 'installing zeppelin') zeppelin = Zeppelin() zeppelin.install() - zeppelin.setup_etc_env() zeppelin.open_ports() set_state('zeppelin.installed') update_status() @@ -78,8 +83,7 @@ def initial_setup(): hookenv.application_version_set(zeppelin_version) -@when('zeppelin.installed') -@when('hadoop.ready') +@when('zeppelin.installed', 'hadoop.ready') @when_not('zeppelin.hadoop.configured') def configure_hadoop(hadoop): zeppelin = Zeppelin() @@ -121,24 +125,47 @@ def unconfigure_hive(): @when('zeppelin.installed', 'spark.ready') def configure_spark(spark): + ''' + Configure Zeppelin to use remote Spark resources. + ''' + # NB: Use the master_url string if it already starts with spark://. + # Otherwise, it means the remote spark is in local or yarn mode -- that's + # bad because using 'local' or 'yarn' here would cause zepp's spark-submit + # to use the builtin spark, hence ignoring the remote spark. In this case, + # set a state so we can inform the user that the remote spark is unusable. master_url = spark.get_master_url() - if data_changed('spark.master', master_url): - hookenv.status_set('maintenance', 'configuring spark') - zeppelin = Zeppelin() - zeppelin.configure_spark(master_url) - set_state('zeppelin.spark.configured') - update_status() + + if master_url.startswith('spark'): + remove_state('spark.master.unusable') + # Only (re)configure if our master url has changed. + if data_changed('spark.master', master_url): + hookenv.status_set('maintenance', 'configuring spark') + zeppelin = Zeppelin() + zeppelin.configure_spark(master_url) + set_state('zeppelin.spark.configured') + else: + remove_state('zeppelin.spark.configured') + set_state('spark.master.unusable') + update_status() @when('zeppelin.installed', 'zeppelin.spark.configured') @when_not('spark.ready') def unconfigure_spark(): + ''' + Remove remote Spark; reconfigure Zeppelin to use embedded Spark. + ''' hookenv.status_set('maintenance', 'removing spark relation') zeppelin = Zeppelin() - # Yarn / Hadoop may not actually be available, but that is the default - # value and nothing else would reasonably work here either without Spark. - zeppelin.configure_spark('yarn-client') - data_changed('spark.master', 'yarn-client') # ensure updated if re-added + + # Zepp includes the spark-client role, so reconfigure our built-in spark + # if our related spark has gone away. + if is_state('zeppelin.hadoop.configured'): + local_master = 'yarn-client' + else: + local_master = 'local[*]' + zeppelin.configure_spark(local_master) + data_changed('spark.master', local_master) # ensure updated if re-added remove_state('zeppelin.spark.configured') update_status() diff --git a/bigtop-packages/src/charm/zeppelin/layer-zeppelin/tests/02-zeppelin-smoke.py b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/tests/02-zeppelin-smoke.py index cac98c51..0887cbef 100755 --- a/bigtop-packages/src/charm/zeppelin/layer-zeppelin/tests/02-zeppelin-smoke.py +++ b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/tests/02-zeppelin-smoke.py @@ -28,12 +28,9 @@ class TestDeploy(unittest.TestCase): def setUpClass(cls): cls.d = amulet.Deployment(series='xenial') cls.d.add('zeppelin') - cls.d.add('spark') - - cls.d.relate('zeppelin:spark', 'spark:client') cls.d.setup(timeout=1800) - cls.d.sentry.wait_for_messages({'zeppelin': re.compile('ready with')}, timeout=1800) + cls.d.sentry.wait_for_messages({'zeppelin': re.compile('ready')}, timeout=1800) cls.zeppelin = cls.d.sentry['zeppelin'][0] def test_zeppelin(self): diff --git a/bigtop-packages/src/charm/zeppelin/layer-zeppelin/tests/03-zeppelin-spark-smoke.py b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/tests/03-zeppelin-spark-smoke.py new file mode 100755 index 00000000..daddc51b --- /dev/null +++ b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/tests/03-zeppelin-spark-smoke.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import amulet +import re +import unittest + + +class TestDeploy(unittest.TestCase): + """ + Smoke test for Apache Bigtop Zeppelin using remote Spark resources. + """ + @classmethod + def setUpClass(cls): + cls.d = amulet.Deployment(series='xenial') + cls.d.add('zeppelin') + cls.d.add('spark') + + cls.d.relate('zeppelin:spark', 'spark:client') + + cls.d.setup(timeout=1800) + cls.d.sentry.wait_for_messages({'zeppelin': re.compile('ready with')}, timeout=1800) + cls.zeppelin = cls.d.sentry['zeppelin'][0] + + def test_zeppelin(self): + """ + Validate Zeppelin by running the smoke-test action. + """ + uuid = self.zeppelin.run_action('smoke-test') + result = self.d.action_fetch(uuid, full_output=True) + # action status=completed on success + if (result['status'] != "completed"): + self.fail('Zeppelin smoke-test failed: %s' % result) + + +if __name__ == '__main__': + unittest.main() diff --git a/bigtop-packages/src/charm/zeppelin/layer-zeppelin/wheelhouse.txt b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/wheelhouse.txt index d555101f..7369cb8e 100644 --- a/bigtop-packages/src/charm/zeppelin/layer-zeppelin/wheelhouse.txt +++ b/bigtop-packages/src/charm/zeppelin/layer-zeppelin/wheelhouse.txt @@ -1 +1 @@ -requests>=2.0.0,<3.0.0 +requests==2.14.2 |