当用户抱怨数据库缓慢时,除了检查数据库本身状态以外,还需要检查UNIX操作系统本身的资源使用情况,确保系统本身没有性能瓶颈。
这是以前工作中自己写的对AIX系统资源利用率检查的小工具,可以很快显示系统的负载情况,如有异常会给出提示。 功能包括:
# Check CPU usage:
# Check Memory usage:
# Check Disk usage:
# Check DISK IO, iostat:
# Check Network:
# Check system error report:
# Top 10 CPU process:
#!/bin/sh
# config file check
SOURCE=$HOME/config/config
check_source()
{
if [ -r $SOURCE ]; then
. $SOURCE
else
echo "$(basename $0): Cannot locate the default setting file."
exit 1
fi
}
# report header
report_header()
{
HOSTIP=$(ifconfig -a | sed -n '2p' |awk '{print $2}')
HOSTNAME=$(hostname)
#USER=`who am i | cut -d " " -f1`
USER=`id`
echo Hostname: $HOSTNAME Server: $HOSTIP
echo User: $USER Time: $(date +%Y'-'%m'-'%d' '%H':'%M':'%S)
echo SYSTEM CHECK REPORT
echo ===================
}
# LOG_PATH and LOG_FILE
LOG_PATH=/tmp
#LOG_FILE=$LOG_PATH/log_aix_`date +%Y%m%d%H%M%S`
LOG_FILE=$LOG_PATH/aix.log
#
cd $LOG_PATH
test -f $LOG_PATH/log_aix_*
if [ "$?" -eq 0 ];then
#mv $LOG_PATH/log_aix_* $LOG_PATH/bak/ >/dev/null 2>&1
:
else
:
fi
#define temp directory ,if not exist,create temp directory first.
TEMP_PATH=/tmp
if [ -d $TEMP_PATH ];then
:
else
mkdir $TEMP_PATH
#mkdir -p $LOG_PATH/bak
fi
# load config
#check_source
os=`uname`
CPU_VALUE=80
DISK_VALUE=88
if [ "$os" != "AIX" ] && [ "$os" != "aix" ]
then
echo Sorry only AIX platform supported !
exit 0
fi
# report header output
report_header >$LOG_FILE
# Check CPU usage:
echo "***************************************** Check CPU *****************************************">>$LOG_FILE
cnt=5
vmstat 1 $cnt | awk '{print $0;if($1 ~ /^[0-9].*/) (totalcpu+=$16);}; END { avecpu=100-totalcpu/'$cnt'; print "The average cpu usage is :"avecpu}' >$TEMP_PATH/cpu_info
cat $TEMP_PATH/cpu_info >>$LOG_FILE
cpu_count=`cat $TEMP_PATH/cpu_info | grep "lcpu"|cut -d ':' -f2|cut -d ' ' -f2|cut -d '=' -f2`
cpu_used_pct=`cat $TEMP_PATH/cpu_info | grep "The average cpu usage is" |awk -F ":" '{print $2}' `
if [ $cpu_used_pct -gt $CPU_VALUE ] ; then
echo " Warning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`, The CPU usage is $cpu_used_pct%>$CPU_VALUE, please check cpu!">>$LOG_FILE
else
echo " The $cpu_count CPU load is OK, $cpu_used_pct<$CPU_VALUE%">>$LOG_FILE
fi
# Check Memory usage:
echo >>$LOG_FILE
echo >>$LOG_FILE
echo "***************************************** check memory useage *****************************************">>$LOG_FILE
mem_size=`cat $TEMP_PATH/cpu_info | grep "lcpu"|cut -d ':' -f2|cut -d ' ' -f3|cut -d '=' -f2`
mem_size_m=`echo $mem_size|cut -d 'M' -f1`
mem_size_m=`expr $mem_size_m \* 1024`
#echo $mem_size $mem_size_m
cat $TEMP_PATH/cpu_info | awk '{if($1 ~ /^[0-9].*/) (totalavm+=$3);(totalpi+=$6);(totalpo+=$7)};
END { mem_usage=totalavm*4/'$mem_size_m'/'$cnt'; totalpi=totalpi/'$cnt';totalpo=totalpo/'$cnt';
if(totalpi<1 && totalpo<1 && mem_usage<0.88) print " The '$mem_size' memory usage is OK, mem_usage:"mem_usage" totalpi:"totalpi",totalpo:"totalpo;
if(totalpi>1 || totalpo>1 || mem_usage>0.88) print "Warning: The memory '$mem_size' mem_usage:"mem_usage" pagein:"totalpi" and pageout:"totalpo" is high, please check memory usage!"} ' >>$LOG_FILE
# Check Disk usage:
echo >>$LOG_FILE
echo >>$LOG_FILE
echo "***************************************** check disk space *****************************************">>$LOG_FILE
df -k >>$LOG_FILE
df -k |grep -v proc |grep -v Filesystem |awk '{x=1*$4}{print $1","$2","$3","$4","$5","$6","$7}'>$TEMP_PATH/disk_info
cat $TEMP_PATH/disk_info | grep -v '^#' | while read line
do
item1=$(echo $line | awk -F ',' '{print $1}')
item2=$(echo $line | awk -F ',' '{print $2}')
item3=$(echo $line | awk -F ',' '{print $3}')
item4=$(echo $line | awk -F ',' '{print $4}' |awk -F '%' '{print $1}')
item5=$(echo $line | awk -F ',' '{print $5}')
item6=$(echo $line | awk -F ',' '{print $6}')
item7=$(echo $line | awk -F ',' '{print $7}')
if [ "$item4" -gt "$DISK_VALUE" ]; then
echo " Warning: $item7 usage is $item4% > $DISK_VALUE !" >>$LOG_FILE
else
echo " The disk $item7 is OK, $item4% < $DISK_VALUE" >>$LOG_FILE
fi
done
# Check DISK IO, iostat:
#
echo >>$LOG_FILE
echo >>$LOG_FILE
echo "***************************************** check iostat *****************************************">>$LOG_FILE
# iostat 1 $cnt >>$LOG_FILE
# Check Network:
echo >>$LOG_FILE
echo >>$LOG_FILE
echo "***************************************** check netstat *****************************************">>$LOG_FILE
netstat -i >>$LOG_FILE
# Check system error report:
#
echo >>$LOG_FILE
echo >>$LOG_FILE
echo "***************************************** check system err *****************************************">>$LOG_FILE
errpt | head -10 >>$LOG_FILE
day=`date +%D |awk -F "/" '{print $1$2}'`
errpt | awk '{print $2}' | grep ^$day >/dev/null
if [ $? -eq 0 ] ; then
echo " Warning: The system hase error on `date +%Y'-'%m'-'%d' '%H':'%M':'%S`, please check error by errpt!" >>$LOG_FILE
errpt | head -10
else
echo >>$LOG_FILE
echo " No system error reported today, system is OK." >>$LOG_FILE
fi
egrep "OK|!" $LOG_FILE|grep -v disk
echo " Top 10 CPU process:"
ps auxww |head -1 ;ps auxww |grep -v PID |sort -rnk3|head -10
echo " Top 10 Memory process:"
ps auxww |head -1 ;ps auxww |grep -v PID |sort -rnk4|head -10
#echo Pagespace usage:
#lsps -a
#echo Snapshot of virtual memory:
#svmon