Re: script for df-k filesystem alerts...

From: Damir Delija (damir.delija@PBZ.HR)
Date: Thu Sep 12 2002 - 08:31:43 EDT


Hi

this is one more complex we've done to monitor our AIX boxex
(it is actually rewrite of some other scripts)
It is maybe to complex but it can be integrated into BB and it
is intelligent enough not to flood the mailbox

There is some hints about such monitoring in redbook "Managing AIX farms"

-------------------------------------------------------------------------
#!/usr/bin/ksh

#############################################################
#Pet 5 Apr 2002 14:33:46
#PBZ HR
#df monitoring script
#version 0.0.2-D
#
#Author: Damir Delija ddelija@srce.hr
#############################################################
# FILE NAME: dfk
# FILE DESCRIPTION: crontab tester df based on bb-scripts
#
# RETURN VALUE DESCRIPTION:
# 0 sucess
# 1 failure
#
# EXTERNAL PROCEDURES CALLED:
#
# OPERATIONS
#
# this is based on standard df-k monitors from various sources
# scripts is capable of generating notifications if FS goes over the
# limit. At the moment data blocks are only monitored not I nodes
# Script strores df -k into file and compares result with configuration
# file CONFFILE if there is event and this is first event it is noticed
# message is send (or logged or echoed depened on configuration)
#
# swap monitoring is also added (lsps line) but it can turned off
#
# CONTROL
# there is CONTROLFILE=/etc/dfmon which if existst aborts execution
#
# there is CONFFILE=$CONFDIR/df_mon.conf which has limits for various FS
# format is:
# device<tab|space>Info<tab|space>Warning<tab|space>Fatal
#
# #FS Info Warning Fatal # in first column is comment
# DEF 10 10 10 DEF is default value for each FS
# / 88 92 50 specific value for part. FS
# /usr 95 97 99
# /var 70 80 90
#
# USAGE add into crontab and configure parameters
#
# QUIRCS
# lslp on AIX strange message rootvg locked no sense at all
#
############################################################
#Revison history
#0.0.0-D just working with df
#0.0.1-D df and lsps, idea by Dean
#0.0.2-D added support for BB
#0.0.3-D moved to rootop
############################################################

export PATH=/bin:/usr/local/bin:/usr/bin:/etc:/usr/sbin:/usr/ucb:/sbin

##BASEDIR=/usr/sysadmin/scripts
BASEDIR=/tmp/rdf

CONFDIR=$BASEDIR
LOGDIR=$BASEDIR/logs

#control file exists no test ..
CONTROLFILE=/etc/dfmon
test -f "$CONTROLFILE" || exit 0

#if lsps is executed, additional test
SWAPTEST="SWAP"

##########################################################
# testing and creating directories nesecarry
# for script operation
##########################################################
for d in $CONFDIR $LOGDIR
do
        if test ! -d "$d"
        then
                mkdir -p $d || exit 1
        fi
done

#############################################################
#work files
#############################################################
HOST=$(hostname)

#############################################################
#there is no configuration - misconfigured exit 1
#############################################################
CONFFILE=$CONFDIR/df_mon.conf
test -f "$CONFFILE" || exit 1

#############################################################
#logs and event data
#in DF last df-k data is stored
#############################################################
DF=$LOGDIR/df-k

#############################################################
#event, or two last messages cretaed
#############################################################
EV=$LOGDIR/event.df-k
EVO=$LOGDIR/event.df-k.old

#############################################################
#Notification info
#############################################################
#if LOGFILE exists message is automatically added to it
LOGFILE=$LOGDIR/$HOST.df_mon.log
#email where report is sent
DFADMIN=me@my.email
#logger syslog level to log message
SYSLOG=""
#to dump message on stdout
ECHO=""
#to send snmptrap -just for future use
SNMPTRAP=""

#BigBrother
BB="echo"
BBDISP=" ..."

######################################################
#get the default values, line DEF in conf file
######################################################
DATADEF=$(fgrep DEF "$CONFFILE")

######################################################
#set the message and global action variable
#if ACTION is > 0 it means do action
######################################################
MESSAGE="$HOST"
ACTION=0

#real data collection
> $DF
#snashot of the FS ... just FS and data percentage,
#blank in the end is important becasue of sub file systems /usr and
/usr/local
df -k | awk 'NR>1{ print $4+0" "$NF }' > $DF

if test -n "$SWAPTEST"
then
        #added on Dean suggestion test swap ...
        lsps -a | awk 'NR>1{ print $5+0" "$1 }' >> $DF
fi

################################################
#walk trough df and test conditions
#FS loop
################################################

grep -v "^#" $DF | while read ACTUAL FS
do

        #defaults for control
        LEVEL=""
        TRIGGER=0

        #get data for current FS or defaults and filter out commented lines
        ##DATA=$(fgrep "$FS " "$CONFFILE"| grep -v "^#")
        DATA=$(egrep "$FS |$FS " "$CONFFILE"| grep -v "^#")

        #if no line in conf use defaults
        test -z "$DATA" && DATA="$DATADEF"

        #error ... missing even defaults - go out!
        test -z "$DATA" && exit 1

        set $DATA
        INFO=$2; WARN=$3; FATAL=$4

        #find out the level of message

        if [ "$ACTUAL" -gt "$FATAL" ]
        then
                TRIGGER=1
                LEVEL="Fatal"

        elif [ "$ACTUAL" -gt "$WARN" ]
        then
                TRIGGER=1
                LEVEL="Warning"

        elif [ "$ACTUAL" -gt "$INFO" ]
        then
                TRIGGER=1
                LEVEL="Info"
        fi

#generate message if trigger is UP, actually add data to message

if [ "$TRIGGER" -gt 0 ]
then
        ACTION=$TRIGGER
        MESSAGE=$MESSAGE"\n $FS $LEVEL $ACTUAL"
fi

done

######################################################
# end of the FS loop
######################################################

######################################################
#store the latest event and one previous
######################################################
test -f "$EV" && mv "$EV" "$EVO"
touch "$EVO"
echo "$MESSAGE" > "$EV"

#EVENTS are same - no sending messages just to AVOID flodding
diff "$EV" "$EVO" > /dev/null 2>&1 && exit 0

#if loop because of posible case later ...
if [ "$ACTION" -gt 0 ]
then

######################################################
#notification
#if there is email adress send email
#if syslog do syslog
#if echo dump on the screen
######################################################

#remove \n form subjet line outlook problem ..
SUBJECT=$(echo "$MESSAGE"| sed 's/\\n/ /g')

if test -n "$SYSLOG"
then
        logger -p"$SYSLOG" "$SUBJECT"
fi

if test -n "$LOGFILE"
then
        {
        date
        echo "$MESSAGE"
        echo "###############################"
        } >> "$LOGFILE"
fi

#use printf to expand \n in message old trick ..
if test -n "$DFADMIN"
then

printf "$MESSAGE" ""| mail -s "$SUBJECT" "$DFADMIN"

fi

if test -n "$ECHO"
then
        date
        echo "$MESSAGE"
        echo "###############################"
fi

#what to do with BB ...
#BIG BROTHER
if test -n "$BB"
then

#
# NOW SEND THIS INFORMATION TO THE BIG BROTHER DISPLAY UNIT
#

TEST="disk"

        case "$LEVEL" in
        "WARNING")
                COLOR="YELLOW";
                INFO="- Disk partitions on $MACHINE in WARNING"
                ;;
        "FATAL")
                COLOR="RED";
                INFO="- Disk partitions on $MACHINE in FATAL"
                ;;
        *)
                COLOR="GREEN";
                INFO="- Disk partitions on $MACHINE OK"
                ;;
        esac

LINE="status $HOST.$TEST $COLOR `date` $INFO

`df -k; lsps -a`"

        $BB "$BBDISP" "$LINE"

fi

fi

exit 0
# end dfk



This archive was generated by hypermail 2.1.7 : Wed Apr 09 2008 - 22:16:12 EDT