#!/usr/bin/ksh
##set -x
##-----------------------------------------------------------------------
##Filename: gg_proc_lag_check.sh
##Purpose: Checkup GGS Process and lag to Notify at a definded interval
#------------------------------------------------------------------------
. ~/.bash_profile
ORACLE_SID=$1
REPEAT=$2
ORACLE_HOME=`cat /etc/oratab |grep "^$ORACLE_SID:" |cut -f2 -d:|tr -d '[:blank:]'`
if [ "$ORACLE_HOME" = "" ]
then
echo "$ORACLE_HOME";
echo "Unable to determin ORACLE_HOME in /etc/oratab. exit." ;
exit 1;
fi
export GGS_HOME=/u03/golden/bin
PATH=${PATH}:${ORACLE_HOME}/bin:${GGS_HOME}
export ORACLE_SID;
export ORACLE_HOME;
export LD_LIBRARY_PATH=${ORACLE_HOME}/lib:${GGS_HOME}:/usr/dt/lib:/usr/ucblib
export SHLIB_PATH=${ORACLE_HOME}/lib:${GGS_HOME}
export PATH;
export DL_LIST='onstardbteam@hp.com'
rm $HOME/scripts/tmp_pagers.txt
cp $HOME/scripts/dba_pagers.txt $HOME/scripts/tmp_pagers.txt
sed -i -e 's/,/ /g' $HOME/scripts/tmp_pagers.txt
export FAILURE_CONTACT=`cat $HOME/scripts/tmp_pagers.txt`
export LOGDIR=/u03/golden/scripts/logs
export SCRIPTDIR=/u03/golden/scripts
export LOGFILE=ggs_check.log
export LOGFILE_LAG=ggs_lag.log
### LAG_CHK_MIN is for mininum replication checkpoint lag threshold in minutes.
### Can be modified based on business needs.
LAG_CHK_MIN=60
##if [ ! -d ${LOGDIR} ]; then
## mkdir -p $LOGDIR
##fi
##ALL Process CheckUp
cd $GGS_HOME
./ggsci > ${LOGDIR}/${LOGFILE} << EOF
info all
exit
EOF
##if stopped, send info, if abend, send error message in email
egrep -i 'STOP|ABEND' ${LOGDIR}/${LOGFILE} > /dev/null
if [ $? = 0 ]; then
##egrep -i 'STOP|ABEND' ${LOGDIR}/${LOGFILE} | awk '{print $3}' | while read f
for f in `egrep -i 'STOP|ABEND' ${LOGDIR}/${LOGFILE} | awk '{print $3}'`
do
##there may be times some process stopped/abend, put those process name into
##the ggs_not_mon_proc.txt file and we will not get warning on them!
egrep $f ${SCRIPTDIR}/ggs_not_mon_proc.txt > /dev/null
if [ $? != 0 ]; then
egrep -i 'STOP' ${LOGDIR}/${LOGFILE} | egrep $f > /dev/null
if [ $? = 0 ]; then
if [ ! -f ${LOGDIR}/${f}_down_count ]; then
echo "1" > ${LOGDIR}/${f}_down_count
egrep $f ${LOGDIR}/${LOGFILE} | mailx -s "GGS `hostname` : $f Process DOWN" -r $DL_LIST $FAILURE_CONTACT
elif [ -f ${LOGDIR}/${f}_down_count ]; then
if [ `cat ${LOGDIR}/${f}_down_count` -ge ${REPEAT} ]; then
egrep $f ${LOGDIR}/${LOGFILE} | mailx -s "GGS `hostname` : $f Process DOWN" -r $DL_LIST $FAILURE_CONTACT
rm ${LOGDIR}/${f}_down_count
else
expr `cat ${LOGDIR}/${f}_down_count` + 1 > ${LOGDIR}/${f}_down_count
fi
fi
fi
egrep -i 'ABEND' ${LOGDIR}/${LOGFILE} | egrep $f > /dev/null
if [ $? = 0 ]; then
if [ ! -f ${LOGDIR}/${f}_abend_count ]; then
echo "1" > ${LOGDIR}/${f}_abend_count
cat $GGS_HOME/dirrpt/$f.rpt | egrep -i "ORA-" | egrep -v "last error" | mailx -s "GGS `hostname` : $f Error" -r $DL_LIST $FAILURE_CONTACT
elif [ -f ${LOGDIR}/${f}_abend_count ]; then
if [ `cat ${LOGDIR}/${f}_abend_count` -ge ${REPEAT} ]; then
cat $GGS_HOME/dirrpt/$f.rpt | egrep -i "ORA-" | egrep -v "last error" | mailx -s "GGS `hostname` : $f Error" -r $DL_LIST $FAILURE_CONTACT
rm ${LOGDIR}/${f}_abend_count
else
expr `cat ${LOGDIR}/${f}_abend_count` + 1 > ${LOGDIR}/${f}_abend_count
fi
fi
fi
fi
done
fi
for running_proc in `egrep -i 'RUNNING' ${LOGDIR}/${LOGFILE} | awk '{print $3}'`
do
rm ${LOGDIR}/${running_proc}_down_count > /dev/null 2>&1
rm ${LOGDIR}/${running_proc}_abend_count > /dev/null 2>&1
done
##If manager process down, send email!
egrep -i 'STOP' ${LOGDIR}/${LOGFILE} | egrep -i "MANAGER"
if [ $? = 0 ]; then
if [ ! -f ${LOGDIR}/mgr_down_count ]; then
echo "1" > ${LOGDIR}/mgr_down_count
egrep -i "MANAGER" ${LOGDIR}/${LOGFILE} | mailx -s "GGS `hostname` : Manager DOWN" -r $DL_LIST $FAILURE_CONTACT
elif [ -f ${LOGDIR}/mgr_down_count ]; then
if [ `cat ${LOGDIR}/mgr_down_count` -ge ${REPEAT} ]; then
egrep -i "MANAGER" ${LOGDIR}/${LOGFILE} | mailx -s "GGS `hostname` : Manager DOWN" -r $DL_LIST $FAILURE_CONTACT
rm ${LOGDIR}/mgr_down_count
else
expr `cat ${LOGDIR}/mgr_down_count` + 1 > ${LOGDIR}/mgr_down_count
fi
fi
elif [ $? != 0 ]; then
rm ${LOGDIR}/mgr_down_count > /dev/null 2>&1
fi
##Check Process Lag!
##there may be times some process stopped/abend, put those process name into
##the ggs_not_mon_proc.txt file and we will not get warning on them!
cat ${LOGDIR}/${LOGFILE} | egrep -i 'EXTRACT|REPLICAT' | while read line
do
## Replication lagging check
HOUR=`echo $line | awk '{print $4}' | awk -F":" '{print $1}'`
MINUTE=`echo $line | awk '{print $4}' | awk -F":" '{print $2}'`
SECOND=`echo $line | awk '{print $4}' | awk -F":" '{print $3}'`
proc_name=`echo $line | awk '{print $3}'`
if [ $MINUTE -gt $LAG_CHK_MIN -o $HOUR -gt "00" ]; then
egrep $proc_name ${SCRIPTDIR}/ggs_not_mon_proc.txt
if [ $? != 0 ]; then
if [ ! -f ${LOGDIR}/${proc_name}_lag_count ]; then
echo "1" > ${LOGDIR}/${proc_name}_lag_count
mailx -s "GGS `hostname` : $proc_name LAG Alert (> $LAG_CHK_MIN Minutes)" -r $DL_LIST $FAILURE_CONTACT < ${LOGDIR}/${LOGFILE}
elif [ -f ${LOGDIR}/${proc_name}_lag_count ]; then
if [ `cat ${LOGDIR}/${proc_name}_lag_count` -ge ${REPEAT} ]; then
mailx -s "GGS `hostname` : $proc_name LAG Alert (> $LAG_CHK_MIN Minutes)" -r $DL_LIST $FAILURE_CONTACT < ${LOGDIR}/${LOGFILE}
rm ${LOGDIR}/${proc_name}_lag_count
else
expr `cat ${LOGDIR}/${proc_name}_lag_count` + 1 > ${LOGDIR}/${proc_name}_lag_count
fi
fi
fi
else
rm ${LOGDIR}/${proc_name}_lag_count > /dev/null 2>&1
fi
## Checkpoint lagging check
HOUR_chk=`echo $line | awk '{print $5}' | awk -F":" '{print $1}'`
MINUTE_chk=`echo $line | awk '{print $5}' | awk -F":" '{print $2}'`
SECOND_chk=`echo $line | awk '{print $5}' | awk -F":" '{print $3}'`
proc_name=`echo $line | awk '{print $3}'`
if [ $MINUTE_chk -gt $LAG_CHK_MIN -o $HOUR_chk -gt "00" ]; then
egrep $proc_name ${SCRIPTDIR}/ggs_not_mon_proc.txt
if [ $? != 0 ]; then
if [ ! -f ${LOGDIR}/${proc_name}_chk_count ]; then
echo "1" > ${LOGDIR}/${proc_name}_chk_count
mailx -s "GGS `hostname` : $proc_name CHECKPOINT Alert (> $LAG_CHK_MIN minutes)" -r $DL_LIST $FAILURE_CONTACT < ${LOGDIR}/${LOGFILE}
elif [ -f ${LOGDIR}/${proc_name}_chk_count ]; then
if [ `cat ${LOGDIR}/${proc_name}_chk_count` -ge ${REPEAT} ]; then
mailx -s "GGS `hostname` : $proc_name CHECKPOINT Alert (> $LAG_CHK_MIN minutes)" -r $DL_LIST $FAILURE_CONTACT < ${LOGDIR}/${LOGFILE}
rm ${LOGDIR}/${proc_name}_chk_count
else
expr `cat ${LOGDIR}/${proc_name}_chk_count` + 1 > ${LOGDIR}/${proc_name}_chk_count
fi
fi
fi
else
rm ${LOGDIR}/${proc_name}_chk_count > /dev/null 2>&1
fi
done
exit
发表评论
点击:1647