#!/bin/bash
#########################################################################
#                                                                       #
# This script processed linaro web logs, producing different reports    #
# Reports that can be run:                                              #
# 1) Awffull  (webalizer fork)                                          #
# 2) Webalizer                                                          #
# 3) Webdruid (webalizer fork)                                          #
# 4) Visitors                                                           #
#                                                                       #
# You can run those tools on the full log or a processed log            #
# The full log shows path traveled and other info, the processed log    #
# really only shows what files were downloaded, for releases and        #
# snapshots.linaro.org this is important information.                   #
#                                                                       #
# 1) full log analyses of un unaltered web log                          #
# 2) filtered analyses of the web log of only the .gz and .bz2 files    #
#    and everything else stripped out                                   #
#                                                                       #
# To speed things up we use dnshistory to do reverse DNS                #
# as a time cheat we only run the lookup on www.linaro.org log so       #
# you should always process that log first so your DNS data is fresh    #
#                                                                       #
#########################################################################

# first save where we are
STARTING_LOCATION=`pwd`

# house keeping
if [ -z "$WEB_NAME" ] || [ -z "$TRUE" ] ; then
    echo "WEB SITE NAME or other variables NOT SET"
    echo "This script is not designed to be called directly"
    exit 1
fi

#TRUE=1
#FALSE=0

#WEB_NAME="snapshots.linaro.org"
#WEB_NAME="releases.linaro.org"
#WEB_NAME="www.linaro.org"

# Which tools are we running, if not set at all set to false
if [ -z "$AWFFULL" ] ; then
    AWFFULL=$FALSE
fi
if [ -z "$WEBALIZER" ] ; then
    WEBALIZER=$FALSE
fi
if [ -z "$WEBDRUID" ] ; then
   WEBDRUID=$FALSE
fi
if [ -z "$VISITORS" ] ; then
    VISITORS=$FALSE
fi

# this allows an external script to set DEBUG, or if it's not set,
# then set it to false here so the script is run quietly
if [ -z "$DEBUG" ] ; then
    DEBUG=$FALSE
fi

# do we want to extract file info and run the log analyzers on only that data
# quite handy for snapshots and releases .linaro.org so we get a better
# picture of what is downloaded.
# 1 = true, 0 = false
if [ $WEB_NAME = "snapshots.linaro.org" ] || [ $WEB_NAME = "releases.linaro.org" ] || [ $WEB_NAME = "builds.96boards.org" ]; then
    EXTRACT_GZ_BZ2_FILES=$TRUE
    EXTRACT_TOOLCHAIN_LOG=$TRUE
else
    EXTRACT_GZ_BZ2_FILES=$FALSE
    EXTRACT_TOOLCHAIN_LOG=$FALSE
fi

# this allows an external script to set DO_GEOIP_LOOKUP or DO_REV_DNS_LOOKUP
# as desired but by default they are set to true so the log files have
# as much data in them as possible.
if [ -z "$DO_GEOIP_LOOKUP" ]  ; then
    DO_GEOIP_LOOKUP=$TRUE
fi
if [ -z "$DO_REV_DNS_LOOKUP" ] ; then
    DO_REV_DNS_LOOKUP=$TRUE
fi

# Load config
CONFIG=${1:-config}
source $CONFIG

#if [ $WEB_NAME = "snapshots.linaro.org" ] || [ $WEB_NAME = "releases.linaro.org" ] ; then
#    RAW_LOG_NAME="$WEB_NAME-$PROCESSED_LOG_NAME"
#elif [ $WEB_NAME = "www.linaro.org" ] ; then
#    RAW_LOG_NAME="$PROCESSED_LOG_NAME"
#fi

#########################################################################
#                                                                       #
# only have old logs on www.linaro.org as it's hard to get stuff done   #
# on that machine                                                       #
#                                                                       #
#########################################################################
if [ $WEB_NAME = "www.linaro.org" ] ; then
    OLD_LOG_PATH="$INPUT_PATH/2013"
    OLD_LOG_NAME="$PROCESSED_LOG_NAME"
else
    OLD_LOG_PATH="$INPUT_PATH"
    OLD_LOG_NAME=""
fi

AWFFULL_FULL_PATH_NAME="awffull.full"
AWFFULL_CONF_FULL_NAME="awffull.conf.full"

AWFFULL_FILTERED_PATH_NAME="awffull.filtered"
AWFFULL_CONF_FILTERED_NAME="awffull.conf.filtered"

AWFFULL_TOOLCHAIN_PATH_NAME="awffull.toolchain"
AWFFULL_CONF_TOOLCHAIN_NAME="awffull.conf.toolchain"

AWFFULL_EXTRA_CMD_LINE_OPTIONS=""

WEBALIZER_FULL_PATH_NAME="webalizer.full"
WEBALIZER_CONF_FULL_NAME="webalizer.conf.full"

WEBALIZER_FILTERED_PATH_NAME="webalizer.filtered"
WEBALIZER_CONF_FILTERED_NAME="webalizer.conf.filtered"

WEBALIZER_TOOLCHAIN_PATH_NAME="webalizer.toolchain"
WEBALIZER_CONF_TOOLCHAIN_NAME="webalizer.conf.toolchain"

WEBALIZER_EXTRA_CMD_LINE_OPTIONS=""

WEBDRUID_FULL_PATH_NAME="webdruid.full"
WEBDRUID_CONF_FULL_NAME="webdruid.conf.full"

WEBDRUID_FILTERED_PATH_NAME="webdruid.filtered"
WEBDRUID_CONF_FILTERED_NAME="webdruid.conf.filtered"

WEBDRUID_TOOLCHAIN_PATH_NAME="webdruid.toolchain"
WEBDRUID_CONF_TOOLCHAIN_NAME="webdruid.conf.toolchain"

WEBDRUID_EXTRA_CMD_LINE_OPTIONS=""

write.conf.file ()
{
# $1 = File to write conf into into
# $2 = Output directory for analsys to be written into
# $3 = Info to be written into the web analsys to id what you are looking at

    echo "# DO NOT EDIT THIS FILE, modify analyse-snapshot.sh as it rewrites this file everytime it's run"  > $1
    echo "OutputDir $2"   >> $1
    echo "HTMLPost $3"                                >> $1
    echo "ReportTitle \"Usage Statistics for the $3 of \"" >> $1
    echo "HostName $WEB_NAME"                            >> $1
    echo "ReallyQuiet yes"                               >> $1
    echo "TopSites 100"                                  >> $1
    echo "AllSites yes"                                  >> $1
    echo "TopURLs 100"                                   >> $1
    echo "AllURLs yes"                                   >> $1
    echo "GeoIP no"                                      >> $1
#    echo "GeoIPDatabase   $GEO_IP_DB"                    >> $1
    echo "IgnoreURL /get-remote-static"                  >> $1
    echo "IgnoreURL /linaro-openid/login"                >> $1
    echo "IgnoreURL /get-textile-files"                  >> $1
    echo "IgnoreURL /css/*"                              >> $1
    echo "IgnoreURL /static/*"                           >> $1
    echo "IgnoreURL /js/*"                               >> $1
    echo "IgnoreURL /license"                            >> $1

    if [ "$WEB_NAME" == "cards.linaro.org" -a $1 == "$WORK_PATH/$WEBDRUID_CONF_FULL_NAME" ]; then
        # Graphs take too long too generate, being killed if tried
        echo "PathGraph no"                            >> $1
        echo "UsersFlow no"                            >> $1
    fi
}

write_config_files ()
{
    if [ $AWFFULL -eq $TRUE ] ; then
        write.conf.file $WORK_PATH/$AWFFULL_CONF_FULL_NAME      $OUTPUT_PATH/$AWFFULL_FULL_PATH_NAME      $PROCESSED_LOG_NAME
        if [ $DEBUG -eq $TRUE ] ; then
            echo "write.awffull.conf.full"
        fi
        if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then
            write.conf.file $WORK_PATH/$AWFFULL_CONF_FILTERED_NAME  $OUTPUT_PATH/$AWFFULL_FILTERED_PATH_NAME  $FILTERED_LOG_NAME
            if [ $DEBUG -eq $TRUE ] ; then
                echo "write.awffull.conf.filtered"
            fi
            if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
                write.conf.file $WORK_PATH/$AWFFULL_CONF_TOOLCHAIN_NAME $OUTPUT_PATH/$AWFFULL_TOOLCHAIN_PATH_NAME $TOOLCHAIN_LOG_NAME
                if [ $DEBUG -eq $TRUE ] ; then
                    echo "write.awffull.conf.toolchain"
                fi
            fi
        fi
    fi

    if [ $WEBALIZER -eq $TRUE ] ; then
        write.conf.file $WORK_PATH/$WEBALIZER_CONF_FULL_NAME      $OUTPUT_PATH/$WEBALIZER_FULL_PATH_NAME      $PROCESSED_LOG_NAME
        if [ $DEBUG -eq $TRUE ] ; then
            echo "write.webalizer.conf.full"
        fi
        if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then
            write.conf.file $WORK_PATH/$WEBALIZER_CONF_FILTERED_NAME  $OUTPUT_PATH/$WEBALIZER_FILTERED_PATH_NAME  $FILTERED_LOG_NAME
            if [ $DEBUG -eq $TRUE ] ; then
                echo "write.webalizer.conf.filtered"
            fi
            if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
                write.conf.file $WORK_PATH/$WEBALIZER_CONF_TOOLCHAIN_NAME $OUTPUT_PATH/$WEBALIZER_TOOLCHAIN_PATH_NAME $TOOLCHAIN_LOG_NAME
                if [ $DEBUG -eq $TRUE ] ; then
                    echo "write.webalizer.conf.toolchain"
                fi
            fi
        fi
    fi

    if [ $WEBDRUID -eq $TRUE ] ; then
        write.conf.file $WORK_PATH/$WEBDRUID_CONF_FULL_NAME      $OUTPUT_PATH/$WEBDRUID_FULL_PATH_NAME      $PROCESSED_LOG_NAME
        if [ $DEBUG -eq $TRUE ] ; then
            echo "write.webdruid.conf.full"
        fi
        if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then
            write.conf.file $WORK_PATH/$WEBDRUID_CONF_FILTERED_NAME  $OUTPUT_PATH/$WEBDRUID_FILTERED_PATH_NAME  $FILTERED_LOG_NAME
            if [ $DEBUG -eq $TRUE ] ; then
                echo "write.webdruid.conf.filtered"
            fi
            if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
                write.conf.file $WORK_PATH/$WEBDRUID_CONF_TOOLCHAIN_NAME $OUTPUT_PATH/$WEBDRUID_TOOLCHAIN_PATH_NAME $TOOLCHAIN_LOG_NAME
                if [ $DEBUG -eq $TRUE ] ; then
                    echo "write.webdruid.conf.toolchain"
                fi
            fi
        fi
    fi
}

prep_directories ()
{
    if [ ! -d "$WORK_PATH" ]; then
        mkdir -p "$WORK_PATH"
        if [ $DEBUG -eq $TRUE ] ; then
            echo "mkdir -p $WORK_PATH"
        fi
    fi
    if [ $AWFFULL -eq $TRUE ] ; then
        if [ ! -d "$OUTPUT_PATH/$AWFFULL_FULL_PATH_NAME" ]; then
            mkdir -p "$OUTPUT_PATH/$AWFFULL_FULL_PATH_NAME"
            if [ $DEBUG -eq $TRUE ] ; then
                echo "mkdir -p $OUTPUT_PATH/$AWFFULL_FULL_PATH_NAME"
            fi
        fi
        if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then
            if [ ! -d "$OUTPUT_PATH/$AWFFULL_FILTERED_PATH_NAME" ]; then
                mkdir -p "$OUTPUT_PATH/$AWFFULL_FILTERED_PATH_NAME"
                if [ $DEBUG -eq $TRUE ] ; then
                    echo "mkdir -p $OUTPUT_PATH/$AWFFULL_FILTERED_PATH_NAME"
                fi
            fi
            if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
                if [ ! -d "$OUTPUT_PATH/$AWFFULL_TOOLCHAIN_PATH_NAME" ]; then
                    mkdir -p "$OUTPUT_PATH/$AWFFULL_TOOLCHAIN_PATH_NAME"
                    if [ $DEBUG -eq $TRUE ] ; then
                        echo "mkdir -p $OUTPUT_PATH/$AWFFULL_TOOLCHAIN_PATH_NAME"
                    fi
                fi
            fi
        fi
    fi
    if [ $WEBALIZER -eq $TRUE ] ; then
        if [ ! -d "$OUTPUT_PATH/$WEBALIZER_FULL_PATH_NAME" ]; then
            mkdir -p "$OUTPUT_PATH/$WEBALIZER_FULL_PATH_NAME"
            if [ $DEBUG -eq $TRUE ] ; then
                echo "mkdir -p $OUTPUT_PATH/$WEBALIZER_FULL_PATH_NAME"
            fi
        fi
        if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then
            if [ ! -d "$OUTPUT_PATH/$WEBALIZER_FILTERED_PATH_NAME" ]; then
                mkdir -p "$OUTPUT_PATH/$WEBALIZER_FILTERED_PATH_NAME"
                if [ $DEBUG -eq $TRUE ] ; then
                    echo "mkdir -p $OUTPUT_PATH/$WEBALIZER_FILTER_PATH_NAME"
                fi
            fi
            if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
                if [ ! -d "$OUTPUT_PATH/$WEBALIZER_TOOLCHAIN_PATH_NAME" ]; then
                    mkdir -p "$OUTPUT_PATH/$WEBALIZER_TOOLCHAIN_PATH_NAME"
                    if [ $DEBUG -eq $TRUE ] ; then
                       echo "mkdir -p $OUTPUT_PATH/$WEBALIZER_TOOLCHAIN_PATH_NAME"
                    fi
                fi
            fi
        fi
    fi
    if [ $WEBDRUID -eq $TRUE ] ; then
        if [ ! -d "$OUTPUT_PATH/$WEBDRUID_FULL_PATH_NAME" ]; then
            mkdir -p "$OUTPUT_PATH/$WEBDRUID_FULL_PATH_NAME"
            if [ $DEBUG -eq $TRUE ] ; then
                echo "mkdir -p $OUTPUT_PATH/$WEBDRUID_FULL_PATH_NAME"
            fi
        fi
        if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then
            if [ ! -d "$OUTPUT_PATH/$WEBDRUID_FILTERED_PATH_NAME" ]; then
                mkdir -p "$OUTPUT_PATH/$WEBDRUID_FILTERED_PATH_NAME"
                if [ $DEBUG -eq $TRUE ] ; then
                    echo "mkdir -p $OUTPUT_PATH/$WEBDRUID_FILTERED_PATH_NAME"
                fi
            fi
            if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
                if [ ! -d "$OUTPUT_PATH/$WEBDRUID_TOOLCHAIN_PATH_NAME" ]; then
                    mkdir -p "$OUTPUT_PATH/$WEBDRUID_TOOLCHAIN_PATH_NAME"
                    if [ $DEBUG -eq $TRUE ] ; then
                        echo "mkdir -p $OUTPUT_PATH/$WEBDRUID_TOOLCHAIN_PATH_NAME"
                    fi
                fi
            fi
        fi
    fi

    cd "$WORK_PATH"
}

cleanup ()
{
    # now delete the temp log file.
        rm -f $WORK_PATH/$TMP_LOG_NAME
        if [ $DEBUG -eq $TRUE ] ; then
            echo "rm -f $WORK_PATH/$TMP_LOG_NAME"
        fi

        rm -f $WORK_PATH/$PROCESSED_LOG_NAME
        if [ $DEBUG -eq $TRUE ] ; then
            echo "rm -f $WORK_PATH/$PROCESSED_LOG_NAME"
        fi

    if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then
            rm -f $WORK_PATH/$FILTERED_LOG_NAME
            if [ $DEBUG -eq $TRUE ] ; then
                echo "rm -f $WORK_PATH/$FILTERED_LOG_NAME"
            fi
            if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
                rm -f $WORK_PATH/$TOOLCHAIN_LOG_NAME
                if [ $DEBUG -eq $TRUE ] ; then
                    echo "rm -f $WORK_PATH/$TOOLCHAIN_LOG_NAME"
                fi
            fi
    fi

    if [ $AWFFULL -eq $TRUE ] ; then
            rm -f $WORK_PATH/$AWFFULL_CONF_FULL_NAME
            if [ $DEBUG -eq $TRUE ] ; then
                echo "rm -f $WORK_PATH/$AWFFULL_CONF_FULL_NAME"
            fi

        if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then
                rm -f $WORK_PATH/$AWFFULL_CONF_FILTERED_NAME
                if [ $DEBUG -eq $TRUE ] ; then
                    echo "rm -f $WORK_PATH/$AWFFULL_CONF_FILTERED_NAME"
                fi
                if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
                    rm -f $WORK_PATH/$AWFFULL_CONF_TOOLCHAIN_NAME
                    if [ $DEBUG -eq $TRUE ] ; then
                        echo "rm -f $WORK_PATH/$AWFFULL_CONF_TOOLCHAIN_NAME"
                    fi
                fi
        fi
    fi

    if [ $WEBALIZER -eq $TRUE ] ; then
            rm -f $WORK_PATH/$WEBALIZER_CONF_FULL_NAME
            if [ $DEBUG -eq $TRUE ] ; then
                echo "rm -f $WORK_PATH/$WEBALIZER_CONF_FULL_NAME"
            fi

        if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then
            if [ -f "$WORK_PATH/$WEBALIZER_CONF_FILTERED_NAME" ] ; then
                rm -f $WORK_PATH/$WEBALIZER_CONF_FILTERED_NAME
            if [ $DEBUG -eq $TRUE ] ; then
                echo "rm -f $WORK_PATH/$WEBALIZER_CONF_FILTERED_NAME"
            fi
                if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
                    rm -f $WORK_PATH/$WEBALIZER_CONF_TOOLCHAIN_NAME
            if [ $DEBUG -eq $TRUE ] ; then
                echo "rm -f $WORK_PATH/$WEBALIZER_CONF_TOOLCHAIN_NAME"
            fi
                fi
            fi
        fi
    fi

    if [ $WEBDRUID -eq $TRUE ] ; then
            rm -f $WORK_PATH/$WEBDRUID_CONF_FULL_NAME
            if [ $DEBUG -eq $TRUE ] ; then
                echo "rm -f $WORK_PATH/$WEBDRUID_CONF_FULL_NAME"
            fi

        if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then
                rm -f $WORK_PATH/$WEBDRUID_CONF_FILTERED_NAME
                if [ $DEBUG -eq $TRUE ] ; then
                    echo "rm -f $WORK_PATH/$WEBDRUID_CONF_FILTERED_NAME"
                fi
                if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
                    rm -f $WORK_PATH/$WEBDRUID_CONF_TOOLCHAIN_NAME
                    if [ $DEBUG -eq $TRUE ] ; then
                        echo "rm -f $WORK_PATH/$WEBDRUID_CONF_TOOLCHAIN_NAME"
                    fi
                fi
        fi
    fi
}

extract_logs ()
{
    # Build a single log file that is not gzipped.

    # Now in 2014 we can just preprocess all 2012 and 2013 files and save processing time for all 3 web servers
    # then just grab all of the 2014 files to process
    # *access.log-2014*
    # preprocessed-*-2013-access.log.gz
    x=`ls $INPUT_PATH/$RAW_LOG_NAME | wc -l`
    if [ x > 0 ] ; then
       if [ $DEBUG -eq $TRUE ] ; then
           echo "$WEB_NAME making access.log by zcat $INPUT_PATH/$RAW_LOG_NAME"
       fi
       zcat $INPUT_PATH/$RAW_LOG_NAME | grep -v "::1" > $WORK_PATH/$TMP_LOG_NAME
    fi
    # Previous years logs preprocessed into a single compressed file to save processing time.
    if [ $DEBUG -eq $TRUE ] ; then
        zcat $INPUT_PATH/preprocessed*access.log.gz > $WORK_PATH/$PROCESSED_LOG_NAME || true
    else
        zcat $INPUT_PATH/preprocessed*access.log.gz > $WORK_PATH/$PROCESSED_LOG_NAME 2>/dev/null || true
    fi

    if [ $DO_REV_DNS_LOOKUP -eq $TRUE ] || [ $DO_GEOIP_LOOKUP -eq $TRUE ] ; then
        # If it's www.linaro.org build the DNS database
        # This is a tad risky as we could have differnt folks coming directly
        # into releases or snapshots then the main site, that said the risk is
        # low and the speedup huge so it's worth it.
        if [ $WEB_NAME = "www.linaro.org" ] ; then
            if [ $DEBUG -eq $TRUE ] ; then
                echo "About to do dnshistory lookup"
            fi
            if [ $DEBUG -eq $TRUE ] ; then
                /usr/bin/dnshistory -L $DNSHISTORY_OPTS -d $DNSHISTORY_DB -f $WORK_PATH/$TMP_LOG_NAME
            else
                /usr/bin/dnshistory -L $DNSHISTORY_OPTS -d $DNSHISTORY_DB -f $WORK_PATH/$TMP_LOG_NAME > /dev/null
            fi
        fi

        # Now translate ip addresses to DNS names for all log files
        if [ $DO_GEOIP_LOOKUP -eq $TRUE ] ; then
            # if GEOIP LOOKUP is desired do both GEOIP and reverse DNS lookup at the sametime
            # the iploc.py program was modified to read both databases and do both in one pass.
            if [ $DEBUG -eq $TRUE ] ; then
                echo "About to do GEOIP LOOKUP and dnshistory replace"
            fi
            python $STARTING_LOCATION/iploc.py --config=$STARTING_LOCATION/$CONFIG \
                $WORK_PATH/$TMP_LOG_NAME >> $WORK_PATH/$PROCESSED_LOG_NAME
        else
            # GEOIP info not requested so do the reverse DNS only
            if [ $DEBUG -eq $TRUE ] ; then
                echo "About to do dnshistory replace only"
            fi
            /usr/bin/dnshistory -T --logtype=www -d $DNSHISTORY_DB -f $WORK_PATH/$TMP_LOG_NAME   >> $WORK_PATH/$PROCESSED_LOG_NAME
        fi
    else
        if [ $DEBUG -eq $TRUE ] ; then
            echo "No GEOIP LOOKUP or Reverse DNS"
        fi
        cat $WORK_PATH/$TMP_LOG_NAME >> $WORK_PATH/$PROCESSED_LOG_NAME
    fi

    # now make a new file with only .gz, bz2, xz,exe, and zip files downloaded
    # this grep can take some time to run, it's using a regular expression to extract compressed files
    if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then
        cat $WORK_PATH/$PROCESSED_LOG_NAME | grep -E '\<*\.(bz2|gz|xz|exe|zip)\>' | grep -v "gcc-linaro\ " > $WORK_PATH/$TMP_LOG_NAME
        if [ $DEBUG -eq $TRUE ] ; then
            echo "creating filtered log"
        fi
        # strip out our known IP's and some standard extra junk we don't need or care about
        cat $WORK_PATH/$TMP_LOG_NAME \
           | grep -v .asc \
           | grep -v HEAD \
           | grep -v OPTIONS \
           | grep -v .png \
           | grep -v .ico \
           | grep -v .css \
           | grep -v .js \
           | grep -v  validation.linaro.org \
           > $WORK_PATH/$FILTERED_LOG_NAME
        if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
            if [ $DEBUG -eq $TRUE ] ; then
                echo "creating toochain log"
            fi
            cat $WORK_PATH/$FILTERED_LOG_NAME | grep -E '\<*gcc-linaro' > $WORK_PATH/$TOOLCHAIN_LOG_NAME
        fi
    fi
}

process_logs ()
{
    ## Awffull
    # use all the data in the file
    if [ $DEBUG -eq $TRUE ] ; then
        echo "`date`"
    fi
    if [ $AWFFULL -eq $TRUE ] ; then
        awffull $AWFFULL_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$AWFFULL_CONF_FULL_NAME $WORK_PATH/$PROCESSED_LOG_NAME > /dev/null
        if [ $DEBUG -eq $TRUE ] ; then
             echo "awffull processing $WORK_PATH/$PROCESSED_LOG_NAME"
        fi
        if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then
            # Now use the filtered log that has only the .bz2 or .gz files and look at those patterns
            awffull $AWFFULL_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$AWFFULL_CONF_FILTERED_NAME $WORK_PATH/$FILTERED_LOG_NAME > /dev/null
            if [ $DEBUG -eq $TRUE ] ; then
                 echo "awffull processing $WORK_PATH/$FILTERED_LOG_NAME"
            fi
            if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
                awffull $AWFFULL_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$AWFFULL_CONF_TOOLCHAIN_NAME $WORK_PATH/$TOOLCHAIN_LOG_NAME > /dev/null
                if [ $DEBUG -eq $TRUE ] ; then
                     echo "awffull processing $WORK_PATH/$TOOLCHAIN_LOG_NAME"
                fi
            fi
        fi
    fi

    ## Webalizer
    # use all the data in the file
    if [ $DEBUG -eq $TRUE ] ; then
        echo "`date`"
    fi
    if [ $WEBALIZER -eq $TRUE ] ; then
        webalizer $WEBALIZER_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBALIZER_CONF_FULL_NAME $WORK_PATH/$PROCESSED_LOG_NAME > /dev/null
        if [ $DEBUG -eq $TRUE ] ; then
            echo "webalizer processing $WORK_PATH/$PROCESSED_LOG_NAME"
        fi
        if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then
            # Now use the filtered log that has only the .bz2 or .gz files and look at those patterns
            webalizer $WEBALIZER_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBALIZER_CONF_FILTERED_NAME $WORK_PATH/$FILTERED_LOG_NAME > /dev/null
            if [ $DEBUG -eq $TRUE ] ; then
                echo "webalizer processing $WORK_PATH/$FILTERED_LOG_NAME"
            fi
            if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
                webalizer $WEBALIZER_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBALIZER_CONF_TOOLCHAIN_NAME $WORK_PATH/$TOOLCHAIN_LOG_NAME > /dev/null
                if [ $DEBUG -eq $TRUE ] ; then
                    echo "webalizer processing $WORK_PATH/$TOOLCHAIN_LOG_NAME"
                fi
            fi
        fi
    fi

    ## Visitors
    # use all the data in the file
    if [ $DEBUG -eq $TRUE ] ; then
        echo "`date`"
    fi
    if [ $VISITORS -eq $TRUE ] ; then
        if [ $DEBUG -eq $TRUE ] ; then
            echo "visitors processing $WORK_PATH/$PROCESSED_LOG_NAME"
            echo visitors --ignore-404 -A --prefix $WEB_NAME -o html --trails --output-file $OUTPUT_PATH/visitors-all.html $WORK_PATH/$PROCESSED_LOG_NAME
        fi
        visitors --ignore-404 -A --prefix $WEB_NAME -o html --trails --output-file $OUTPUT_PATH/visitors-all.html $WORK_PATH/$PROCESSED_LOG_NAME &> /dev/null
        if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then
            # Now use the filtered log that has only the .bz2 or .gz files and look at those patterns
            visitors --ignore-404 -A --prefix $WEB_NAME -o html --trails --output-file $OUTPUT_PATH/visitors-filtered.html $WORK_PATH/$FILTERED_LOG_NAME &> /dev/null
            cp $WORK_PATH/$FILTERED_LOG_NAME $OUTPUT_PATH/.
            gzip -f -9 $OUTPUT_PATH/$FILTERED_LOG_NAME
            if [ $DEBUG -eq $TRUE ] ; then
                echo "visitors processing $WORK_PATH/$FILTERED_LOG_NAME and then creating $OUTPUT_PATH/$FILTERED_LOG_NAME.gz"
            fi
            if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
                visitors --ignore-404 -A --prefix $WEB_NAME -o html --trails --output-file $OUTPUT_PATH/visitors-toolchain.html $WORK_PATH/$TOOLCHAIN_LOG_NAME &> /dev/null
                cp $WORK_PATH/$TOOLCHAIN_LOG_NAME $OUTPUT_PATH/.
                gzip -f -9 $OUTPUT_PATH/$TOOLCHAIN_LOG_NAME
            if [ $DEBUG -eq $TRUE ] ; then
                echo "visitors processing $WORK_PATH/$TOOLCHAIN_LOG_NAME and then creating $OUTPUT_PATH/$TOOLCHAIN_LOG_NAME.gz"
            fi
            fi
        fi
    fi

    ## Webdruid
    # use all the data in the file
    if [ $DEBUG -eq $TRUE ] ; then
        echo "`date`"
    fi
    if [ $WEBDRUID -eq $TRUE ] ; then
        webdruid $WEBDRUID_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBDRUID_CONF_FULL_NAME $WORK_PATH/$PROCESSED_LOG_NAME > /dev/null
        if [ $DEBUG -eq $TRUE ] ; then
            echo "webdruid processing $WORK_PATH/$PROCESSED_LOG_NAME"
        fi
        if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then
            # Now use the filtered log that has only the .bz2 or .gz files and look at those patterns
            webdruid $WEBDRUID_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBDRUID_CONF_FILTERED_NAME $WORK_PATH/$FILTERED_LOG_NAME > /dev/null
            if [ $DEBUG -eq $TRUE ] ; then
                echo "webdruid processing $WORK_PATH/$FILTERED_LOG_NAME"
            fi
            if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
                webdruid $WEBDRUID_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBDRUID_CONF_TOOLCHAIN_NAME $WORK_PATH/$TOOLCHAIN_LOG_NAME > /dev/null
                if [ $DEBUG -eq $TRUE ] ; then
                    echo "webdruid processing $WORK_PATH/$TOOLCHAIN_LOG_NAME"
                fi
            fi
        fi
    fi

}

# OK, the actual section to do work, prep, write out config files as needed,
# extract log files, process the log files, cleanup and exit.

# verify and make if necessary all directories needed by this script
prep_directories
cleanup

# write out the config files, rememnber they are changed here in this file
write_config_files

#/usr/bin/touch ~/bin/starttime
# combine all logs into a single file, then filter the log into a second file
extract_logs

# analyse the logs...
process_logs

# cleanup the extra files and stuff
if [ $DEBUG -ne $TRUE ] ; then
    cleanup
else
    echo "WARNING: Not cleaning up temporary files, beware of running out of disk space."
fi

# change back to where we were called from
cd $STARTING_LOCATION

# done, out of here