#!/bin/ksh
#
# This program is actually two programs.  There are 2 subroutines in this
# program.  The first of which will monitor Oracle filesyetems for
# Archive/Redo logs filling up.  Upon finding an exception condition, 
# the program will call a new version of itself with a command line
# option indicating that the second subroutine should be called to 
# actually archive the logs to tape.
#
###
#
# Modification History
#
# Date         Who         What
# -----------  ----------  -------------------------------------------------
# 1999-08-18   Bill V.     Initial writing
# 1999-08-25   Bill V.     Add some more calls to send_mail upon error
#                          conditions.  As well, as of this date, the
#                          script is, as far as I'm concerned, ready to go.
#                          Therefore, from this point forward, please
#                          document all script changes below.
# 1999-09-13   Bill V.     Lock file  - potential problem.  Change the >> to >
#                          when creating the lock file in archive()
# 1999-12-02   Bill V.     Add more pager phone numbers
#
#
#   #################################################################
#   #     The "global" routine is called first by this script.      #
#   #     This section is exactly that - "global".  It sets up      #
#   #     variables used for either aspect of this script.          #
#   #     The last line of this routine should always be the        #
#   #     "$host" command.  This calls a subroutine equal to the    #
#   #     machine's hostname.  More on that later...                #
#   #################################################################
global() {

   host=`hostname`
   boj=`date`
   stamp="date +%Y%m%d-%H%M%S"
   input_file="/etc/filesystems"
   log="/var/$host.ora_archive.log"
   lock_file="/var/db_mon.lck"
   touch $log
   cnt=0
   a=0
   myname="$0"
   mon_sw="monitor"
   arc_sw="archive"
   filesystems="X"
   thresholds="X"
   tape_catalog="tapecatalog.fifo"
   max_tapes=10
   mail_subj="Oracle Database log archive"
   tape_command="tar -cvf"
   nystedt_pgr=3129253008@mobile.att.net
   gliosci_pgr=3129253007@mobile.att.net
   yeazel_pgr=6302485270@mobile.att.net
   ariola_pgr=6302485274@mobile.att.net
   pager="NULL"

   $host
}

#   #################################################################
#   #   $host specific information goes here.                       #
#   #   Mandatory items are filesystems, thresholds and device.     #
#   #   Optional items are - well - any global variables can be     #
#   #   overriden for specific hosts.                               #
#   #################################################################
gatchiuxmulp02() {
   filesystems="/u02 /u03 /u04" 
   thresholds="85   85    85"
   device="/dev/rmt0"
   pager="$yeazel_pgr, $ariola_pgr"
   mailto="root,oracle"
}

#   #################################################################
#   #   $host specific information goes here.                       #
#   #   Mandatory items are filesystems, thresholds and device.     #
#   #   Optional items are - well - any global variables can be     #
#   #   overriden for specific hosts.                               #
#   #################################################################
dwprod() {
:
   pager="$yeazel_pgr, $ariola_pgr"
   device="/dev/rmt0"
   mailto="root,oracle"
}

#   #################################################################
#   #   log_it.  This subroutine sticks whatever is in the          #
#   #   variable "msg" into the script log file.                    #
#   #################################################################

log_it() {
   echo "$msg" >> $log
}

#   #################################################################
#   #   send_mail.  This subroutine is a global mail routine.       #
#   #   Parameters required:                                        #
#   #   $subj = The subject of the e-mail                           #
#   #   $mailto = the recepients                                    #
#   #   $mailmsg = the body of the e-mail                           #
#   #   Note that after sending the e-mail, the subject and         #
#   #   body are reset to null.                                     #
#   #################################################################

send_mail() {
    msg="BEGIN MAIL MESSAGE\n"
    log_it
    msg="${msg}MAIL SUBJECT: $subj\n"
    msg="MAIL MESSAGE BODY: $mailmsg\n"
    msg="${msg}MAIL RECEPIENTS: $mailto\n"
    msg="${msg}END MAIL MESSAGE\n"
    log_it
    echo "$mailmsg" | mail -s "$subj" $mailto 
    subj=""
    mailmsg=""
}
#   #################################################################
#   #   send_page:  Send a page to a pre-defined person.            #
#   #   The "pager" variable can be defined on a per-system basis.  #
#   #################################################################
   
send_page() {
   if [ "$pager" = "NULL" ] ; then
      msg="No pager defined for $host"
      log_it
      pager="root,oracle"
   else
      msg="Sending message: $page_msg to pager: $pager"
      log_it
   fi

   echo "$page_msg" | mail $pager
}

#   #################################################################
#   #   clean_house:  General cleanup before EOJ                    #
#   #################################################################

clean_house() {
   msg="Performing clean_house"
   log_it
   rm $lock_file
   rm $tmp
   rm $tmp.1
}

#   #################################################################
#   #   monitor.  This module is one of two modules actually        #
#   #   called by the user.  The other is "archive.  More on that   #
#   #   later.                                                      #
#   #   This module is responsible for actually looking at the      #
#   #   archive spaces and seeing if we are above thresholds.       #
#   #   If we hit a threshold, this module will call a new copy     #
#   #   of the script with 2 command line options...                #
#   #   1.  "archive"                                               #
#   #   2.  The offending filesystem mount point                    #
#   #   The new version of the script then performs the "archive"   #
#   #   subroutine (more on that later...) and actually does the    #
#   #   archive.                                                    #
#   #################################################################
monitor() {

   msg="##################   Begin processing - monitor"
   log_it

   if [ "$filesystems" = "X" ] ; then
      msg="\n$host is not configured for archiving\n"
      log_it
      subj="Archiving Configuration Error on $host"
      mailmsg="Archiving is currently not configured to run on host $host"
      send_mail
      exit 2
   fi
   fs_count=0
   th_count=0
   for i in $filesystems ; do
      fs_count=`expr $fs_count + 1`
   done
   for i in $thresholds ; do
      th_count=`expr $th_count + 1`
   done

   if [ "$fs_count" -ne "$th_count" ] ; then
      msg="\nFilesystem and threshold array mismatch\n"
      log_it
      subj="Filesystem and threshold array mismatch $host"
      mailmsg="The filesystem array and threshold array are out of sync"
      send_mail
      exit 1
   fi

   cnt=1
   clear
   msg="\nTimestamp		Filesystem	Threshold	Currently"
   log_it
   while [ "$cnt" -le "$fs_count" ] ; do
      filesystem=`echo $filesystems | cut -f$cnt -d" "`
      threshold=`echo $thresholds | cut -f$cnt -d" "`
      crnt=`df -k $filesystem | awk '{print $4}' | tail -1 | cut -f1 -d%`
      if [ "$crnt" -gt "$threshold" ] ; then
         flag='*'
         arch_flag=1
      else
         flag=" "
         arch_flag=0
      fi
      s=`$stamp`
      msg="$s		$filesystem		$threshold		$crnt${flag}"
      log_it
      if [ "$arch_flag" -eq "1" ] ; then
         msg="$filesystem is over threshold..executing archive function"
         log_it
         $0 archive $filesystem
      fi
      cnt=`expr $cnt + 1`
   done
   msg="##################   End processing - monitor"
   log_it
}

#   #################################################################
#   #   archive.  This is where all the magic occurs.               #
#   #   This routine is called by one of the 2 following            #
#   #   conditions:                                                 #
#   #   1.  The "monitor" routine above has found a filesystem      #
#   #       above its' designated threshold                         #
#   #   2.  Someone has externally specified a filesystem to        #
#   #       perform a log archive.   Perhaps before a backup ?      #
#   #   This subroutine requires 2 and only 2 command line          #
#   #   options:                                                    #
#   #   1.  "archive"                                               #
#   #   2.  Filesystem to archive                                   #
#   #   Once this routine is called, a lock file is put in place    #
#   #   that will keep subsequent calls to this module from         #
#   #   stepping on each other and copying each others' log files.  #
#   #   If upon execution of this module, the lock file is found,   #
#   #   an entry is placed in the log, and execution aborts.        #
#   #   The contents of the lock file is the process-id of the      #
#   #   currently running archiver.                                 #
#   #   This processing depends on the following file structure..   #
#   #   {filesystem}/oradata/{sid}/arch.  Example:                  #
#   #      /u01/oradata/tpprod/arch.                                #
#   #   If there are multiple sid's in the oradata directory,       #
#   #   functionality of the script is not guaranteed.              #
#   #################################################################
archive() {

   msg="##################   Begin processing - archive"
   log_it

   if [ "$argc" -ne "2" ] ; then
      msg="\nInvalid call to archive function (1)\n"
      log_it
      subj="Invalid command line option to archive function"
      mailmsg="$argv is not a valid command line for archiving function.\n"
      mailmsg="${mailmsg}2 Options are required:\n"
      mailmsg="${mailmsg}1.  monitor | archive\n"
      mailmsg="${mailmsg}2.  filesystem to archive\n"
      send_mail
      exit 4
   fi

   if [ -r $lock_file ] ; then
      locker=`cat $lock_file`
      msg="\nLock file found.  Check process # $locker\n"
      log_it
      subj="Lock file found during archiving process $host"
      mailmsg="It seems that either a second archiving function started, or\n"
      mailmsg="${mailmsg}perhaps the previous archiving function did not\n"
      mailmsg="${mailmsg}completed normally\n\n"
      mailmsg="${mailmsg}Check process id $locker"
      send_mail
      exit 3
   fi

   echo $my_prcid > $lock_file
   cd $filesystem/oradata
   sid=`ls`
   boj=`date`
   cd $sid/arch
   tmp=".${sid}_${tape_catalog}"
   tape_catalog="$tmp"

   if [ -r $tape_catalog ] ; then
      last_tape=`cat $tape_catalog`
   else
      echo "0" > $tape_catalog
   fi
   this_tape=`expr $last_tape + 1`
   if [ "$this_tape" -gt "$max_tapes" ] ; then
      msg="`date`: Tape max of $max_tapes exceeded.  Reverting to 1\n"
      log_it
      subj="Tape catalog wrap around"
      mailmsg="Tape catalog on $host has wrapped around.\n"
      mailmsg="${mailmsg}This may or may not be an error."
      send_mail
      this_tape=1
   fi

   arch_files=`ls -tr *.arc` 1>/dev/null 2>&1
   arch_filelist=""
   arch_cnt=0
   total=0
   tmp="/tmp/arch_tmp.$$"
   >$tmp

   for arch_file in $arch_files ; do
      fuser $arch_file 1>>$tmp 2>&1
   done
   cat $tmp | sort | uniq >$tmp.1
   cat $tmp.1 | sed "s/:/:0/g" |sed "s/ //g" >$tmp

   for line in `cat $tmp` ; do
      arch_file=`echo "$line" | cut -f1 -d":"`
      arch_proc=`echo "$line" | cut -f2 -d":"`
      if [ "$arch_proc" -eq "0" ] ; then
         arch_cnt=`expr $arch_cnt + 1`
         arch_filelist="$arch_filelist${arch_file} "
      fi
      total=`expr $total + 1`
   done

   if [ "$arch_cnt" -eq "0" ] ; then
      clean_house
      exit 0
   fi

   msg="\nFiles to be archived: $arch_filelist ($arch_cnt of $total files)\n"
   log_it

   msg="$tape_command $device $arch_filelist"
   rc=$?
   log_it

   if [ "$rc" -ne "0" ] ; then
      msg="I/O error on archive procedure"
      log_it
      subj="I/O error on $sid archive for $host"
      mailmsg="Oracle archive failed with return code: $rc\n"
      mailmsg="${mailmsg}Tape number is: $this_tape\n"
      send_mail
      page_msg="`date`: I/O error on archive write $host"
      send_page
      purge=0
   else
      purge=1
      echo $this_tape > $tape_catalog
   fi

   subj="$mail_subj for $host (SID=$sid)"
   if [ "$rc" -ne "0" ] ; then
      subj="ERROR WHILE PERFORMING $mail_subj for $host (SID=$sid)"
   fi
   mailmsg="Oracle archive log for host: $host, SID: $sid.\n"
   mailmsg="${mailmsg}The following log files were archived:\n"
   for i in $arch_filelist ; do
      mailmsg="${mailmsg}`ls -l $i`\n"
   done
   mailmsg="\n${mailmsg}Number of files archived: $arch_cnt.\n"
   mailmsg="${mailmsg}Number of files searched: $total.\n"
   mailmsg="${mailmsg}Output device: $device, tape number: $this_tape\n"
   mailmsg="${mailmsg}Processing as of $boj\n"
   send_mail
   if [ "$purge" -eq "1" ] ; then
      for file in $arch_filelist ; do
         msg="(delete) command for $filesystem/oradata/$sid/arch/$file"
         log_it
      done
   fi
   clean_house
   msg="##################   End processing - archive"
   log_it

}

#   #################################################################
#   #   Main processing.  This is where we call the "global"        #
#   #   subroutine.  We interrogate the command line options,       #
#   #   and call the appropriate subroutine.                        #
#   #################################################################

argc="$#"
argv="$*"
option="$1"
my_prcid="$$"

if [ "$#" -eq "2" ] ; then
   filesystem="$2"
fi

global

if [ "$option" = "$mon_sw" ] ; then
   monitor
   exit 0
fi

if [ "$option" = "$arc_sw" ] ; then
   archive
   exit 0
fi

msg="\nInvalid option entered ($argv) ($argc)\n"
echo "$msg"
log_it
exit 5
