#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. #This script polls the specified url (typically a service we want to see running) and process #If it finds that the web request fails it also kills the process being monitored and exits #If it finds that the process is not alive any more we exit #Typically used in startup scripts for services such as solr that should be terminated if the #server is not running #Example usage in a shell script : bigtop-monitor-service $$ http://127.0.0.1:8983/solr function info() { echo "INFO:" "$@" } function monitor() { USAGE="$0 polling_interval_seconds process_id_to_kill url_to_monitor " if [ $# -ne 3 ] then echo $USAGE >&2 exit 1 fi interval="$1" pid="$2" url="$3" if ! expr "$interval" : '^[0-9][0-9]*$' >/dev/null then echo "Invalid value for polling_interval_seconds $interval - must be a positive integer" >&2 kill -9 $pid exit 1 fi if [ $interval -le 0 ] then echo "Invalid value for polling_interval_seconds $interval - must be >= 1" >&2 kill -9 $pid exit 1 fi eval exec {3..255}\>\&- cd / info "Starting a watchdog process monitoring process '$pid' and url '$url'" while : do sleep $interval info "Sending a heartbeat request to $url" HTTP_CODE=`curl -m$interval --retry 5 -L -k -s --negotiate -u : -o /dev/null -w "%{http_code}" "$url"` HTTP_CODE=${HTTP_CODE:-600} # If we're getting 5xx+ (server side error) kill the service and exit # Because curl is weird (it tries to proxy HTTP exit codes to be its # UNIX exit codes times 10 AND at the same time prints 000 as HTTP exit # code) we should also treat exit code of 0 as a failure. if [ $HTTP_CODE -ge 500 -o $HTTP_CODE -eq 0 ] ; then info "Got $HTTP_CODE HTTP code from the server. Watchdog is now killing process: $pid" kill -9 $pid exit 0 fi # If we're getting 4xx (client side error) we better exit silently # 401 (Unauthorized) is a special case of when we should keep running if [ $HTTP_CODE -ge 400 -a $HTTP_CODE -lt 500 -a $HTTP_CODE -ne 401 ] ; then info "Got $HTTP_CODE HTTP code. This is confusing. Watchdog is now exiting..." exit 0 fi if kill -0 $pid >>/dev/null 2>&1 ;then echo "Process $pid is alive" else echo "Process $pid is dead" exit 1 fi done } monitor "$@" &