#!/bin/bash ######################################################################### # # # This script processed linaro web logs, producing different reports # # Reports that can be run: # # 1) Awffull (webalizer fork) # # 2) Webalizer # # 3) Webdruid (webalizer fork) # # 4) Visitors # # # # You can run those tools on the full log or a processed log # # The full log shows path traveled and other info, the processed log # # really only shows what files were downloaded, for releases and # # snapshots.linaro.org this is important information. # # # # 1) full log analyses of un unaltered web log # # 2) filtered analyses of the web log of only the .gz and .bz2 files # # and everything else stripped out # # # # To speed things up we use dnshistory to do reverse DNS # # as a time cheat we only run the lookup on www.linaro.org log so # # you should always process that log first so your DNS data is fresh # # # ######################################################################### # first save where we are STARTING_LOCATION=`pwd` # house keeping if [ -z "$WEB_NAME" ] || [ -z "$TRUE" ] ; then echo "WEB SITE NAME or other variables NOT SET" echo "This script is not designed to be called directly" exit 1 fi #TRUE=1 #FALSE=0 #WEB_NAME="snapshots.linaro.org" #WEB_NAME="releases.linaro.org" #WEB_NAME="www.linaro.org" # Which tools are we running, if not set at all set to false if [ -z "$AWFFULL" ] ; then AWFFULL=$FALSE fi if [ -z "$WEBALIZER" ] ; then WEBALIZER=$FALSE fi if [ -z "$WEBDRUID" ] ; then WEBDRUID=$FALSE fi if [ -z "$VISITORS" ] ; then VISITORS=$FALSE fi # this allows an external script to set DEBUG, or if it's not set, # then set it to false here so the script is run quietly if [ -z "$DEBUG" ] ; then DEBUG=$FALSE fi # do we want to extract file info and run the log analyzers on only that data # quite handy for snapshots and releases .linaro.org so we get a better # picture of what is downloaded. # 1 = true, 0 = false if [ $WEB_NAME = "snapshots.linaro.org" ] || [ $WEB_NAME = "releases.linaro.org" ] || [ $WEB_NAME = "builds.96boards.org" ]; then EXTRACT_GZ_BZ2_FILES=$TRUE EXTRACT_TOOLCHAIN_LOG=$TRUE else EXTRACT_GZ_BZ2_FILES=$FALSE EXTRACT_TOOLCHAIN_LOG=$FALSE fi # this allows an external script to set DO_GEOIP_LOOKUP or DO_REV_DNS_LOOKUP # as desired but by default they are set to true so the log files have # as much data in them as possible. if [ -z "$DO_GEOIP_LOOKUP" ] ; then DO_GEOIP_LOOKUP=$TRUE fi if [ -z "$DO_REV_DNS_LOOKUP" ] ; then DO_REV_DNS_LOOKUP=$TRUE fi # Load config CONFIG=${1:-config} source $CONFIG #if [ $WEB_NAME = "snapshots.linaro.org" ] || [ $WEB_NAME = "releases.linaro.org" ] ; then # RAW_LOG_NAME="$WEB_NAME-$PROCESSED_LOG_NAME" #elif [ $WEB_NAME = "www.linaro.org" ] ; then # RAW_LOG_NAME="$PROCESSED_LOG_NAME" #fi ######################################################################### # # # only have old logs on www.linaro.org as it's hard to get stuff done # # on that machine # # # ######################################################################### if [ $WEB_NAME = "www.linaro.org" ] ; then OLD_LOG_PATH="$INPUT_PATH/2013" OLD_LOG_NAME="$PROCESSED_LOG_NAME" else OLD_LOG_PATH="$INPUT_PATH" OLD_LOG_NAME="" fi AWFFULL_FULL_PATH_NAME="awffull.full" AWFFULL_CONF_FULL_NAME="awffull.conf.full" AWFFULL_FILTERED_PATH_NAME="awffull.filtered" AWFFULL_CONF_FILTERED_NAME="awffull.conf.filtered" AWFFULL_TOOLCHAIN_PATH_NAME="awffull.toolchain" AWFFULL_CONF_TOOLCHAIN_NAME="awffull.conf.toolchain" AWFFULL_EXTRA_CMD_LINE_OPTIONS="" WEBALIZER_FULL_PATH_NAME="webalizer.full" WEBALIZER_CONF_FULL_NAME="webalizer.conf.full" WEBALIZER_FILTERED_PATH_NAME="webalizer.filtered" WEBALIZER_CONF_FILTERED_NAME="webalizer.conf.filtered" WEBALIZER_TOOLCHAIN_PATH_NAME="webalizer.toolchain" WEBALIZER_CONF_TOOLCHAIN_NAME="webalizer.conf.toolchain" WEBALIZER_EXTRA_CMD_LINE_OPTIONS="" WEBDRUID_FULL_PATH_NAME="webdruid.full" WEBDRUID_CONF_FULL_NAME="webdruid.conf.full" WEBDRUID_FILTERED_PATH_NAME="webdruid.filtered" WEBDRUID_CONF_FILTERED_NAME="webdruid.conf.filtered" WEBDRUID_TOOLCHAIN_PATH_NAME="webdruid.toolchain" WEBDRUID_CONF_TOOLCHAIN_NAME="webdruid.conf.toolchain" WEBDRUID_EXTRA_CMD_LINE_OPTIONS="" write.conf.file () { # $1 = File to write conf into into # $2 = Output directory for analsys to be written into # $3 = Info to be written into the web analsys to id what you are looking at echo "# DO NOT EDIT THIS FILE, modify analyse-snapshot.sh as it rewrites this file everytime it's run" > $1 echo "OutputDir $2" >> $1 echo "HTMLPost $3" >> $1 echo "ReportTitle \"Usage Statistics for the $3 of \"" >> $1 echo "HostName $WEB_NAME" >> $1 echo "ReallyQuiet yes" >> $1 echo "TopSites 100" >> $1 echo "AllSites yes" >> $1 echo "TopURLs 100" >> $1 echo "AllURLs yes" >> $1 echo "GeoIP no" >> $1 # echo "GeoIPDatabase $GEO_IP_DB" >> $1 echo "IgnoreURL /get-remote-static" >> $1 echo "IgnoreURL /linaro-openid/login" >> $1 echo "IgnoreURL /get-textile-files" >> $1 echo "IgnoreURL /css/*" >> $1 echo "IgnoreURL /static/*" >> $1 echo "IgnoreURL /js/*" >> $1 echo "IgnoreURL /license" >> $1 if [ "$WEB_NAME" == "cards.linaro.org" -a $1 == "$WORK_PATH/$WEBDRUID_CONF_FULL_NAME" ]; then # Graphs take too long too generate, being killed if tried echo "PathGraph no" >> $1 echo "UsersFlow no" >> $1 fi } write_config_files () { if [ $AWFFULL -eq $TRUE ] ; then write.conf.file $WORK_PATH/$AWFFULL_CONF_FULL_NAME $OUTPUT_PATH/$AWFFULL_FULL_PATH_NAME $PROCESSED_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "write.awffull.conf.full" fi if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then write.conf.file $WORK_PATH/$AWFFULL_CONF_FILTERED_NAME $OUTPUT_PATH/$AWFFULL_FILTERED_PATH_NAME $FILTERED_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "write.awffull.conf.filtered" fi if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then write.conf.file $WORK_PATH/$AWFFULL_CONF_TOOLCHAIN_NAME $OUTPUT_PATH/$AWFFULL_TOOLCHAIN_PATH_NAME $TOOLCHAIN_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "write.awffull.conf.toolchain" fi fi fi fi if [ $WEBALIZER -eq $TRUE ] ; then write.conf.file $WORK_PATH/$WEBALIZER_CONF_FULL_NAME $OUTPUT_PATH/$WEBALIZER_FULL_PATH_NAME $PROCESSED_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "write.webalizer.conf.full" fi if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then write.conf.file $WORK_PATH/$WEBALIZER_CONF_FILTERED_NAME $OUTPUT_PATH/$WEBALIZER_FILTERED_PATH_NAME $FILTERED_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "write.webalizer.conf.filtered" fi if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then write.conf.file $WORK_PATH/$WEBALIZER_CONF_TOOLCHAIN_NAME $OUTPUT_PATH/$WEBALIZER_TOOLCHAIN_PATH_NAME $TOOLCHAIN_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "write.webalizer.conf.toolchain" fi fi fi fi if [ $WEBDRUID -eq $TRUE ] ; then write.conf.file $WORK_PATH/$WEBDRUID_CONF_FULL_NAME $OUTPUT_PATH/$WEBDRUID_FULL_PATH_NAME $PROCESSED_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "write.webdruid.conf.full" fi if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then write.conf.file $WORK_PATH/$WEBDRUID_CONF_FILTERED_NAME $OUTPUT_PATH/$WEBDRUID_FILTERED_PATH_NAME $FILTERED_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "write.webdruid.conf.filtered" fi if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then write.conf.file $WORK_PATH/$WEBDRUID_CONF_TOOLCHAIN_NAME $OUTPUT_PATH/$WEBDRUID_TOOLCHAIN_PATH_NAME $TOOLCHAIN_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "write.webdruid.conf.toolchain" fi fi fi fi } prep_directories () { if [ ! -d "$WORK_PATH" ]; then mkdir -p "$WORK_PATH" if [ $DEBUG -eq $TRUE ] ; then echo "mkdir -p $WORK_PATH" fi fi if [ $AWFFULL -eq $TRUE ] ; then if [ ! -d "$OUTPUT_PATH/$AWFFULL_FULL_PATH_NAME" ]; then mkdir -p "$OUTPUT_PATH/$AWFFULL_FULL_PATH_NAME" if [ $DEBUG -eq $TRUE ] ; then echo "mkdir -p $OUTPUT_PATH/$AWFFULL_FULL_PATH_NAME" fi fi if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then if [ ! -d "$OUTPUT_PATH/$AWFFULL_FILTERED_PATH_NAME" ]; then mkdir -p "$OUTPUT_PATH/$AWFFULL_FILTERED_PATH_NAME" if [ $DEBUG -eq $TRUE ] ; then echo "mkdir -p $OUTPUT_PATH/$AWFFULL_FILTERED_PATH_NAME" fi fi if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then if [ ! -d "$OUTPUT_PATH/$AWFFULL_TOOLCHAIN_PATH_NAME" ]; then mkdir -p "$OUTPUT_PATH/$AWFFULL_TOOLCHAIN_PATH_NAME" if [ $DEBUG -eq $TRUE ] ; then echo "mkdir -p $OUTPUT_PATH/$AWFFULL_TOOLCHAIN_PATH_NAME" fi fi fi fi fi if [ $WEBALIZER -eq $TRUE ] ; then if [ ! -d "$OUTPUT_PATH/$WEBALIZER_FULL_PATH_NAME" ]; then mkdir -p "$OUTPUT_PATH/$WEBALIZER_FULL_PATH_NAME" if [ $DEBUG -eq $TRUE ] ; then echo "mkdir -p $OUTPUT_PATH/$WEBALIZER_FULL_PATH_NAME" fi fi if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then if [ ! -d "$OUTPUT_PATH/$WEBALIZER_FILTERED_PATH_NAME" ]; then mkdir -p "$OUTPUT_PATH/$WEBALIZER_FILTERED_PATH_NAME" if [ $DEBUG -eq $TRUE ] ; then echo "mkdir -p $OUTPUT_PATH/$WEBALIZER_FILTER_PATH_NAME" fi fi if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then if [ ! -d "$OUTPUT_PATH/$WEBALIZER_TOOLCHAIN_PATH_NAME" ]; then mkdir -p "$OUTPUT_PATH/$WEBALIZER_TOOLCHAIN_PATH_NAME" if [ $DEBUG -eq $TRUE ] ; then echo "mkdir -p $OUTPUT_PATH/$WEBALIZER_TOOLCHAIN_PATH_NAME" fi fi fi fi fi if [ $WEBDRUID -eq $TRUE ] ; then if [ ! -d "$OUTPUT_PATH/$WEBDRUID_FULL_PATH_NAME" ]; then mkdir -p "$OUTPUT_PATH/$WEBDRUID_FULL_PATH_NAME" if [ $DEBUG -eq $TRUE ] ; then echo "mkdir -p $OUTPUT_PATH/$WEBDRUID_FULL_PATH_NAME" fi fi if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then if [ ! -d "$OUTPUT_PATH/$WEBDRUID_FILTERED_PATH_NAME" ]; then mkdir -p "$OUTPUT_PATH/$WEBDRUID_FILTERED_PATH_NAME" if [ $DEBUG -eq $TRUE ] ; then echo "mkdir -p $OUTPUT_PATH/$WEBDRUID_FILTERED_PATH_NAME" fi fi if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then if [ ! -d "$OUTPUT_PATH/$WEBDRUID_TOOLCHAIN_PATH_NAME" ]; then mkdir -p "$OUTPUT_PATH/$WEBDRUID_TOOLCHAIN_PATH_NAME" if [ $DEBUG -eq $TRUE ] ; then echo "mkdir -p $OUTPUT_PATH/$WEBDRUID_TOOLCHAIN_PATH_NAME" fi fi fi fi fi cd "$WORK_PATH" } cleanup () { # now delete the temp log file. rm -f $WORK_PATH/$TMP_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "rm -f $WORK_PATH/$TMP_LOG_NAME" fi rm -f $WORK_PATH/$PROCESSED_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "rm -f $WORK_PATH/$PROCESSED_LOG_NAME" fi if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then rm -f $WORK_PATH/$FILTERED_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "rm -f $WORK_PATH/$FILTERED_LOG_NAME" fi if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then rm -f $WORK_PATH/$TOOLCHAIN_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "rm -f $WORK_PATH/$TOOLCHAIN_LOG_NAME" fi fi fi if [ $AWFFULL -eq $TRUE ] ; then rm -f $WORK_PATH/$AWFFULL_CONF_FULL_NAME if [ $DEBUG -eq $TRUE ] ; then echo "rm -f $WORK_PATH/$AWFFULL_CONF_FULL_NAME" fi if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then rm -f $WORK_PATH/$AWFFULL_CONF_FILTERED_NAME if [ $DEBUG -eq $TRUE ] ; then echo "rm -f $WORK_PATH/$AWFFULL_CONF_FILTERED_NAME" fi if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then rm -f $WORK_PATH/$AWFFULL_CONF_TOOLCHAIN_NAME if [ $DEBUG -eq $TRUE ] ; then echo "rm -f $WORK_PATH/$AWFFULL_CONF_TOOLCHAIN_NAME" fi fi fi fi if [ $WEBALIZER -eq $TRUE ] ; then rm -f $WORK_PATH/$WEBALIZER_CONF_FULL_NAME if [ $DEBUG -eq $TRUE ] ; then echo "rm -f $WORK_PATH/$WEBALIZER_CONF_FULL_NAME" fi if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then if [ -f "$WORK_PATH/$WEBALIZER_CONF_FILTERED_NAME" ] ; then rm -f $WORK_PATH/$WEBALIZER_CONF_FILTERED_NAME if [ $DEBUG -eq $TRUE ] ; then echo "rm -f $WORK_PATH/$WEBALIZER_CONF_FILTERED_NAME" fi if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then rm -f $WORK_PATH/$WEBALIZER_CONF_TOOLCHAIN_NAME if [ $DEBUG -eq $TRUE ] ; then echo "rm -f $WORK_PATH/$WEBALIZER_CONF_TOOLCHAIN_NAME" fi fi fi fi fi if [ $WEBDRUID -eq $TRUE ] ; then rm -f $WORK_PATH/$WEBDRUID_CONF_FULL_NAME if [ $DEBUG -eq $TRUE ] ; then echo "rm -f $WORK_PATH/$WEBDRUID_CONF_FULL_NAME" fi if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then rm -f $WORK_PATH/$WEBDRUID_CONF_FILTERED_NAME if [ $DEBUG -eq $TRUE ] ; then echo "rm -f $WORK_PATH/$WEBDRUID_CONF_FILTERED_NAME" fi if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then rm -f $WORK_PATH/$WEBDRUID_CONF_TOOLCHAIN_NAME if [ $DEBUG -eq $TRUE ] ; then echo "rm -f $WORK_PATH/$WEBDRUID_CONF_TOOLCHAIN_NAME" fi fi fi fi } extract_logs () { # Build a single log file that is not gzipped. # Now in 2014 we can just preprocess all 2012 and 2013 files and save processing time for all 3 web servers # then just grab all of the 2014 files to process # *access.log-2014* # preprocessed-*-2013-access.log.gz x=`ls $INPUT_PATH/$RAW_LOG_NAME | wc -l` if [ x > 0 ] ; then if [ $DEBUG -eq $TRUE ] ; then echo "$WEB_NAME making access.log by zcat $INPUT_PATH/$RAW_LOG_NAME" fi zcat $INPUT_PATH/$RAW_LOG_NAME | grep -v "::1" > $WORK_PATH/$TMP_LOG_NAME fi # Previous years logs preprocessed into a single compressed file to save processing time. if [ $DEBUG -eq $TRUE ] ; then zcat $INPUT_PATH/preprocessed*access.log.gz > $WORK_PATH/$PROCESSED_LOG_NAME || true else zcat $INPUT_PATH/preprocessed*access.log.gz > $WORK_PATH/$PROCESSED_LOG_NAME 2>/dev/null || true fi if [ $DO_REV_DNS_LOOKUP -eq $TRUE ] || [ $DO_GEOIP_LOOKUP -eq $TRUE ] ; then # If it's www.linaro.org build the DNS database # This is a tad risky as we could have differnt folks coming directly # into releases or snapshots then the main site, that said the risk is # low and the speedup huge so it's worth it. if [ $WEB_NAME = "www.linaro.org" ] ; then if [ $DEBUG -eq $TRUE ] ; then echo "About to do dnshistory lookup" fi if [ $DEBUG -eq $TRUE ] ; then /usr/bin/dnshistory -L $DNSHISTORY_OPTS -d $DNSHISTORY_DB -f $WORK_PATH/$TMP_LOG_NAME else /usr/bin/dnshistory -L $DNSHISTORY_OPTS -d $DNSHISTORY_DB -f $WORK_PATH/$TMP_LOG_NAME > /dev/null fi fi # Now translate ip addresses to DNS names for all log files if [ $DO_GEOIP_LOOKUP -eq $TRUE ] ; then # if GEOIP LOOKUP is desired do both GEOIP and reverse DNS lookup at the sametime # the iploc.py program was modified to read both databases and do both in one pass. if [ $DEBUG -eq $TRUE ] ; then echo "About to do GEOIP LOOKUP and dnshistory replace" fi python $STARTING_LOCATION/iploc.py --config=$STARTING_LOCATION/$CONFIG \ $WORK_PATH/$TMP_LOG_NAME >> $WORK_PATH/$PROCESSED_LOG_NAME else # GEOIP info not requested so do the reverse DNS only if [ $DEBUG -eq $TRUE ] ; then echo "About to do dnshistory replace only" fi /usr/bin/dnshistory -T --logtype=www -d $DNSHISTORY_DB -f $WORK_PATH/$TMP_LOG_NAME >> $WORK_PATH/$PROCESSED_LOG_NAME fi else if [ $DEBUG -eq $TRUE ] ; then echo "No GEOIP LOOKUP or Reverse DNS" fi cat $WORK_PATH/$TMP_LOG_NAME >> $WORK_PATH/$PROCESSED_LOG_NAME fi # now make a new file with only .gz, bz2, xz,exe, and zip files downloaded # this grep can take some time to run, it's using a regular expression to extract compressed files if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then cat $WORK_PATH/$PROCESSED_LOG_NAME | grep -E '\<*\.(bz2|gz|xz|exe|zip)\>' | grep -v "gcc-linaro\ " > $WORK_PATH/$TMP_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "creating filtered log" fi # strip out our known IP's and some standard extra junk we don't need or care about cat $WORK_PATH/$TMP_LOG_NAME \ | grep -v .asc \ | grep -v HEAD \ | grep -v OPTIONS \ | grep -v .png \ | grep -v .ico \ | grep -v .css \ | grep -v .js \ | grep -v validation.linaro.org \ > $WORK_PATH/$FILTERED_LOG_NAME if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then if [ $DEBUG -eq $TRUE ] ; then echo "creating toochain log" fi cat $WORK_PATH/$FILTERED_LOG_NAME | grep -E '\<*gcc-linaro' > $WORK_PATH/$TOOLCHAIN_LOG_NAME fi fi } process_logs () { ## Awffull # use all the data in the file if [ $DEBUG -eq $TRUE ] ; then echo "`date`" fi if [ $AWFFULL -eq $TRUE ] ; then awffull $AWFFULL_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$AWFFULL_CONF_FULL_NAME $WORK_PATH/$PROCESSED_LOG_NAME > /dev/null if [ $DEBUG -eq $TRUE ] ; then echo "awffull processing $WORK_PATH/$PROCESSED_LOG_NAME" fi if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then # Now use the filtered log that has only the .bz2 or .gz files and look at those patterns awffull $AWFFULL_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$AWFFULL_CONF_FILTERED_NAME $WORK_PATH/$FILTERED_LOG_NAME > /dev/null if [ $DEBUG -eq $TRUE ] ; then echo "awffull processing $WORK_PATH/$FILTERED_LOG_NAME" fi if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then awffull $AWFFULL_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$AWFFULL_CONF_TOOLCHAIN_NAME $WORK_PATH/$TOOLCHAIN_LOG_NAME > /dev/null if [ $DEBUG -eq $TRUE ] ; then echo "awffull processing $WORK_PATH/$TOOLCHAIN_LOG_NAME" fi fi fi fi ## Webalizer # use all the data in the file if [ $DEBUG -eq $TRUE ] ; then echo "`date`" fi if [ $WEBALIZER -eq $TRUE ] ; then webalizer $WEBALIZER_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBALIZER_CONF_FULL_NAME $WORK_PATH/$PROCESSED_LOG_NAME > /dev/null if [ $DEBUG -eq $TRUE ] ; then echo "webalizer processing $WORK_PATH/$PROCESSED_LOG_NAME" fi if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then # Now use the filtered log that has only the .bz2 or .gz files and look at those patterns webalizer $WEBALIZER_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBALIZER_CONF_FILTERED_NAME $WORK_PATH/$FILTERED_LOG_NAME > /dev/null if [ $DEBUG -eq $TRUE ] ; then echo "webalizer processing $WORK_PATH/$FILTERED_LOG_NAME" fi if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then webalizer $WEBALIZER_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBALIZER_CONF_TOOLCHAIN_NAME $WORK_PATH/$TOOLCHAIN_LOG_NAME > /dev/null if [ $DEBUG -eq $TRUE ] ; then echo "webalizer processing $WORK_PATH/$TOOLCHAIN_LOG_NAME" fi fi fi fi ## Visitors # use all the data in the file if [ $DEBUG -eq $TRUE ] ; then echo "`date`" fi if [ $VISITORS -eq $TRUE ] ; then if [ $DEBUG -eq $TRUE ] ; then echo "visitors processing $WORK_PATH/$PROCESSED_LOG_NAME" echo visitors --ignore-404 -A --prefix $WEB_NAME -o html --trails --output-file $OUTPUT_PATH/visitors-all.html $WORK_PATH/$PROCESSED_LOG_NAME fi visitors --ignore-404 -A --prefix $WEB_NAME -o html --trails --output-file $OUTPUT_PATH/visitors-all.html $WORK_PATH/$PROCESSED_LOG_NAME &> /dev/null if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then # Now use the filtered log that has only the .bz2 or .gz files and look at those patterns visitors --ignore-404 -A --prefix $WEB_NAME -o html --trails --output-file $OUTPUT_PATH/visitors-filtered.html $WORK_PATH/$FILTERED_LOG_NAME &> /dev/null cp $WORK_PATH/$FILTERED_LOG_NAME $OUTPUT_PATH/. gzip -f -9 $OUTPUT_PATH/$FILTERED_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "visitors processing $WORK_PATH/$FILTERED_LOG_NAME and then creating $OUTPUT_PATH/$FILTERED_LOG_NAME.gz" fi if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then visitors --ignore-404 -A --prefix $WEB_NAME -o html --trails --output-file $OUTPUT_PATH/visitors-toolchain.html $WORK_PATH/$TOOLCHAIN_LOG_NAME &> /dev/null cp $WORK_PATH/$TOOLCHAIN_LOG_NAME $OUTPUT_PATH/. gzip -f -9 $OUTPUT_PATH/$TOOLCHAIN_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "visitors processing $WORK_PATH/$TOOLCHAIN_LOG_NAME and then creating $OUTPUT_PATH/$TOOLCHAIN_LOG_NAME.gz" fi fi fi fi ## Webdruid # use all the data in the file if [ $DEBUG -eq $TRUE ] ; then echo "`date`" fi if [ $WEBDRUID -eq $TRUE ] ; then webdruid $WEBDRUID_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBDRUID_CONF_FULL_NAME $WORK_PATH/$PROCESSED_LOG_NAME > /dev/null if [ $DEBUG -eq $TRUE ] ; then echo "webdruid processing $WORK_PATH/$PROCESSED_LOG_NAME" fi if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then # Now use the filtered log that has only the .bz2 or .gz files and look at those patterns webdruid $WEBDRUID_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBDRUID_CONF_FILTERED_NAME $WORK_PATH/$FILTERED_LOG_NAME > /dev/null if [ $DEBUG -eq $TRUE ] ; then echo "webdruid processing $WORK_PATH/$FILTERED_LOG_NAME" fi if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then webdruid $WEBDRUID_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBDRUID_CONF_TOOLCHAIN_NAME $WORK_PATH/$TOOLCHAIN_LOG_NAME > /dev/null if [ $DEBUG -eq $TRUE ] ; then echo "webdruid processing $WORK_PATH/$TOOLCHAIN_LOG_NAME" fi fi fi fi } # OK, the actual section to do work, prep, write out config files as needed, # extract log files, process the log files, cleanup and exit. # verify and make if necessary all directories needed by this script prep_directories cleanup # write out the config files, rememnber they are changed here in this file write_config_files #/usr/bin/touch ~/bin/starttime # combine all logs into a single file, then filter the log into a second file extract_logs # analyse the logs... process_logs # cleanup the extra files and stuff if [ $DEBUG -ne $TRUE ] ; then cleanup else echo "WARNING: Not cleaning up temporary files, beware of running out of disk space." fi # change back to where we were called from cd $STARTING_LOCATION # done, out of here