diff options
author | Rajendra Gokhale <rvg@cloudera.com> | 2014-10-15 10:34:41 -0700 |
---|---|---|
committer | Roman Shaposhnik <rvs@apache.org> | 2014-10-19 12:54:13 -0700 |
commit | b444de910d5745d632aee24b81253b893756b07c (patch) | |
tree | bb158a7305df2a98617743771e336c8034d9274a | |
parent | a991f5a1ecebd0d6e2602dccae5df459a6f0240d (diff) |
BIGTOP-1489. Changes to pull tomcat_watchdog code out of solr initialzation scripts into bigtop-utils
4 files changed, 100 insertions, 43 deletions
diff --git a/bigtop-packages/src/common/bigtop-utils/bigtop-monitor-service b/bigtop-packages/src/common/bigtop-utils/bigtop-monitor-service new file mode 100755 index 00000000..776179de --- /dev/null +++ b/bigtop-packages/src/common/bigtop-utils/bigtop-monitor-service @@ -0,0 +1,93 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +#This script polls the specified url (typically a service we want to see running) and process +#If it finds that the web request fails it also kills the process being monitored and exits +#If it finds that the process is not alive any more we exit +#Typically used in startup scripts for services such as solr that should be terminated if the +#server is not running +#Example usage in a shell script : bigtop-monitor-service $$ http://127.0.0.1:8983/solr + +function info() { + echo "INFO:" "$@" +} + +function monitor() { +USAGE="$0 polling_interval_seconds process_id_to_kill url_to_monitor " +if [ $# -ne 3 ] +then + echo $USAGE >&2 + exit 1 +fi +interval="$1" +pid="$2" +url="$3" + +if ! expr "$interval" : '^[0-9][0-9]*$' >/dev/null +then + echo "Invalid value for polling_interval_seconds $interval - must be a positive integer" >&2 + kill -9 $pid + exit 1 +fi + +if [ $interval -le 0 ] +then + echo "Invalid value for polling_interval_seconds $interval - must be >= 1" >&2 + kill -9 $pid + exit 1 +fi + +eval exec {3..255}\>\&- +cd / + +info "Starting a watchdog process monitoring process '$pid' and url '$url'" + +while : +do + sleep $interval + info "Sending a heartbeat request to $url" + + HTTP_CODE=`curl -m$interval --retry 5 -L -k -s --negotiate -u : -o /dev/null -w "%{http_code}" "$url"` + HTTP_CODE=${HTTP_CODE:-600} + + # If we're getting 5xx+ (server side error) kill the service and exit + # Because curl is weird (it tries to proxy HTTP exit codes to be its + # UNIX exit codes times 10 AND at the same time prints 000 as HTTP exit + # code) we should also treat exit code of 0 as a failure. + if [ $HTTP_CODE -ge 500 -o $HTTP_CODE -eq 0 ] ; then + info "Got $HTTP_CODE HTTP code from the server. Watchdog is now killing process: $pid" + kill -9 $pid + exit 0 + fi + + # If we're getting 4xx (client side error) we better exit silently + # 401 (Unauthorized) is a special case of when we should keep running + if [ $HTTP_CODE -ge 400 -a $HTTP_CODE -lt 500 -a $HTTP_CODE -ne 401 ] ; then + info "Got $HTTP_CODE HTTP code. This is confusing. Watchdog is now exiting..." + exit 0 + fi + + if kill -0 $pid >>/dev/null 2>&1 ;then + echo "Process $pid is alive" + else + echo "Process $pid is dead" + exit 1 + fi +done +} + +monitor "$@" & diff --git a/bigtop-packages/src/common/solr/install_solr.sh b/bigtop-packages/src/common/solr/install_solr.sh index 52a65b5e..e5b90494 100644 --- a/bigtop-packages/src/common/solr/install_solr.sh +++ b/bigtop-packages/src/common/solr/install_solr.sh @@ -168,48 +168,6 @@ cat > $PREFIX/$LIB_DIR/bin/solrd <<'EOF' BIGTOP_DEFAULTS_DIR=${BIGTOP_DEFAULTS_DIR-/etc/default} [ -n "${BIGTOP_DEFAULTS_DIR}" -a -r ${BIGTOP_DEFAULTS_DIR}/solr ] && . ${BIGTOP_DEFAULTS_DIR}/solr -function info() { - echo "INFO:" "$@" -} - -function tomcat_watchdog() { - local LOCAL_SOLR_URL="http://127.0.0.1:$SOLR_PORT/solr" - - eval exec {3..255}\>\&- - cd / - info "Starting a watchdog process monitoring $$" - while true ; do - sleep $SOLRD_WATCHDOG_TIMEOUT - info "Sending a heartbeat request to $LOCAL_SOLR_URL" - - HTTP_CODE=`curl -m$SOLRD_WATCHDOG_TIMEOUT --retry 5 -L -k -s --negotiate -u : -o /dev/null -w "%{http_code}" "$LOCAL_SOLR_URL"` - HTTP_CODE=${HTTP_CODE:-600} - - # If we're getting 5xx+ (server side error) kill the service and exit - # Because curl is weird (it tries to proxy HTTP exit codes to be its - # UNIX exit codes times 10 AND at the same time prints 000 as HTTP exit - # code) we should also treat exit code of 0 as a failure. - if [ $HTTP_CODE -ge 500 -o $HTTP_CODE -eq 0 ] ; then - info "Got $HTTP_CODE HTTP code from the Solr server. Watchdog is now killing it: $$" - kill -9 $$ - exit 0 - fi - - # If we're getting 4xx (client side error) we better exit silently - # 401 (Unauthorized) is a special case of when we should keep running - if [ $HTTP_CODE -ge 400 -a $HTTP_CODE -lt 500 -a $HTTP_CODE -ne 401 ] ; then - info "Got $HTTP_CODE HTTP code. This is confusing. Watchdog is now exiting..." - exit 0 - fi - - # Finally check that the monitored process is still running (a bit of belt'n'suspenders) - if ! kill -0 $$ ; then - info "Looks like the Solr server exited. Watchdog is now exiting..." - exit 0 - fi - done -} - # Autodetect JAVA_HOME if not defined . /usr/lib/bigtop-utils/bigtop-detect-javahome @@ -314,8 +272,10 @@ export CATALINA_OPTS="${CATALINA_OPTS} -Dsolr.host=$HOSTNAME # and thus doesn't know the admin port export JAVA_OPTS="$CATALINA_OPTS" +if [ -x /usr/lib/bigtop-utils/bigtop-monitor-service ]; then if ([ "$1" = "start" -o "$1" = "run" ]) && [ -n "$SOLRD_WATCHDOG_TIMEOUT" ] ; then - tomcat_watchdog & + /usr/lib/bigtop-utils/bigtop-monitor-service $SOLRD_WATCHDOG_TIMEOUT $$ http://127.0.0.1:8983/solr +fi fi exec ${CATALINA_HOME}/bin/catalina.sh "$@" diff --git a/bigtop-packages/src/deb/bigtop-utils/rules b/bigtop-packages/src/deb/bigtop-utils/rules index fb940baf..e55e985f 100644 --- a/bigtop-packages/src/deb/bigtop-utils/rules +++ b/bigtop-packages/src/deb/bigtop-utils/rules @@ -35,5 +35,6 @@ override_dh_auto_install: install -p -m 755 debian/bigtop-detect-javahome debian/bigtop-utils/usr/lib/bigtop-utils/ install -p -m 755 debian/bigtop-detect-javalibs debian/bigtop-utils/usr/lib/bigtop-utils/ install -p -m 755 debian/bigtop-detect-classpath debian/bigtop-utils/usr/lib/bigtop-utils/ + install -p -m 755 debian/bigtop-monitor-service debian/bigtop-utils/usr/lib/bigtop-utils/ install -d -p -m 755 debian/bigtop-utils/etc/default install -p -m 644 debian/bigtop-utils.default debian/bigtop-utils/etc/default/bigtop-utils diff --git a/bigtop-packages/src/rpm/bigtop-utils/SPECS/bigtop-utils.spec b/bigtop-packages/src/rpm/bigtop-utils/SPECS/bigtop-utils.spec index 4148b30f..97604343 100644 --- a/bigtop-packages/src/rpm/bigtop-utils/SPECS/bigtop-utils.spec +++ b/bigtop-packages/src/rpm/bigtop-utils/SPECS/bigtop-utils.spec @@ -31,6 +31,7 @@ Source1: LICENSE Source2: bigtop-utils.default Source3: bigtop-detect-javalibs Source4: bigtop-detect-classpath +Source5: bigtop-monitor-service Requires: bash @@ -53,6 +54,7 @@ install -p -m 644 %{SOURCE1} . install -p -m 644 %{SOURCE2} . install -p -m 644 %{SOURCE3} . install -p -m 644 %{SOURCE4} . +install -p -m 644 %{SOURCE5} . %build @@ -64,6 +66,7 @@ install -d -p -m 755 $RPM_BUILD_ROOT/etc/default install -p -m 755 %{SOURCE0} $RPM_BUILD_ROOT%{lib_dir}/ install -p -m 755 %{SOURCE3} $RPM_BUILD_ROOT%{lib_dir}/ install -p -m 755 %{SOURCE4} $RPM_BUILD_ROOT%{lib_dir}/ +install -p -m 755 %{SOURCE5} $RPM_BUILD_ROOT%{lib_dir}/ install -p -m 644 %{SOURCE2} $RPM_BUILD_ROOT/etc/default/bigtop-utils %clean |