#! /bin/bash
export LANG=en_US.UTF-8
#WARNLIMIT=-1 #警告值 超过该值则认为是需要告警
#ERRORLIMIT=-1 #错误值 超过该值认为是错误
FLAGE=
FLAGEID=
#是否输出FLAGPID
FLAGEOPID=1
#是否输出RUNTIME
RUNTIME=1
#是否输出STARTTIME
STARTTIME=0
CONN=0 #是否检查连接数
CONNMIN=
CONNMAX=
Reading=0 #是否检查连接数
ReadingMIN=
ReadingMAX=
Writing=0 #是否检查连接数
WritingMIN=
WritingMAX=
Waiting=0 #是否检查连接数
WaitingMIN=
WaitingMAX=
MEMORY=0 #是否检查内存
MEMORYMIN=
MEMORYMAX=
CPU=0 #是否检查CPU
CPUMIN=
CPUMAX=
CURNUM= #当前的性能参数
WranResultStr= #最后形成的告警字符串
ErrorResultStr= #最后形成的错误字符串
DataResultStr= #性能呢个参数字符串
NGINXSTA=
CORENUM=1
PORT=
quota=
QUOTASTR=
nginx_acquisition_json=""
function gotErr(){
if [ "1" -eq 0 ];then
echo "mailstatedes=各项指标恢复正常 statedes=各项指标恢复正常|DataResultStr"
elif [ "1" -eq 1 ];then echo "quota={quota} mailstatedes=WranResultStr statedes=WranResultStr {QUOTASTR} |DataResultStr"
elif [ "1" -eq 2 ];then if [[ "{WranResultStr}" != "" ]]; then
echo "quota={quota} mailstatedes={WranResultStr},{ErrorResultStr} statedes={WranResultStr},{ErrorResultStr}{QUOTASTR}|DataResultStr"
else
echo "quota={quota} mailstatedes={ErrorResultStr} statedes={ErrorResultStr} {QUOTASTR}|DataResultStr"
fi
elif [ "1" -eq 3 ];then echo "无法获取到组件PID,疑似组件故障,请确认并请检查监控脚本和运维平台配置"
elif [ "1" -eq 4 ];then
echo "该组件进程为僵尸进程,请确认并请检查该组件状态"
exit 3
fi
exit 1
}
#返回是否服务是否存活
function isAlive(){
CORENUM=`cat /proc/cpuinfo |grep "physical id"|wc -l`
FLAGEID=(ss -lnpt "( sport == :PORT )"|tail -n 1|awk -F " " '{print6}'|awk -F ',' '{print 2}')
if [[ "FLAGEID" == "" ]];then
gotErr 3
fi
ZOMBIE=(ps -A -ostat,pid | grep -e '^[Zz]' | grepFLAGEID)
if [[ "ZOMBIE" != "" ]];then
gotErr 4
fi
}
#处理连接数
function getConnNum(){
rm -rf nginx-status
rm -rf check_nginx_temp
timeout 15 wgetNGINXSTA -O nginx-status -o check_nginx_temp
CURNUM=(head -n 1 nginx-status | awk -F ':' '{print2} ' | awk 'gsub(/^ *| */,"")')
if [ -z "CURNUM" ];then
CURNUM=0
fi
nginx_acquisition_json="nginx_acquisition_json,'ConnectionNum':CURNUM"
}
#ps -eo pid,etime 是查询进程运行时间
function getRuntime(){
CURNUM=(ps -eo pid,etime | grepFLAGEID |awk '{if(1=='FLAGEID') print 2}')
nginx_acquisition_json="nginx_acquisition_json,'RUNTIME':'CURNUM'"
}
#ps -eo pid,lstart是查询进程的开始时间
function getStarttime(){
CURNUM=(ps -eo pid,lstart | grep FLAGEID |awk '{if(1=='FLAGEID') print2 ,3 ,4 ,5 ,6}')
nginx_acquisition_json="nginx_acquisition_json,'STARTTIME':'CURNUM'"
}
function getReading(){
CURNUM=(tail -n 1 nginx-status | awk -F ' ' '{print2} ' | awk 'gsub(/^ *| */,"")')
if [ -z "CURNUM" ];then
CURNUM=0
fi
nginx_acquisition_json="nginx_acquisition_json,'Reading':'CURNUM'"
}
function getWriting(){
CURNUM=(tail -n 1 nginx-status | awk -F ' ' '{print4} ' | awk 'gsub(/^ *| */,"")')
if [ -z "CURNUM" ];then
CURNUM=0
fi
nginx_acquisition_json="nginx_acquisition_json,'Writing':'CURNUM'"
}
function getWaiting(){
CURNUM=(tail -n 1 nginx-status | awk -F ' ' '{print6} ' | awk 'gsub(/^ *| */,"")')
if [ -z "CURNUM" ];then
CURNUM=0
fi
nginx_acquisition_json="nginx_acquisition_json,'Waiting':'CURNUM'"
}
#ps aux 4是内存 累加4的值
function getMemory(){
CURNUM=(ps aux | grep nginx|grep process|awk 'BEGIN{sum=0}{sum+=4}END{print sum}')
nginx_acquisition_json="nginx_acquisition_json,'Memory':CURNUM"
}
#ps aux 3是CPU 累加3的值
function getCPU(){
CURNUM=(ps aux | grep nginx|grep process|awk 'BEGIN{sum=0}{sum+=3}END{print sum}')
if [ "CORENUM" -ne 0 ];then
CURNUM=(printf "%.2f" `echo "scale=2;CURNUM/CORENUM" | bc`)
fi
nginx_acquisition_json="nginx_acquisition_json,'CPU':CURNUM"
}
#依据传入的参数值来判断我们要监控的服务是什么
#然后通过各个函数获取到具体的值
#值之间的
function analysisStat(){
CURNUM=FLAGEID
nginx_acquisition_json="nginx_acquisition_json,'FLAGEPID':CURNUM"
getRuntime
getConnNum
getMemory
getCPU
getStarttime
if [[{NGINXSTA} == "" || {NGINXSTA} == "<<url>>" ]];then
CURNUM=""
nginx_acquisition_json="nginx_acquisition_json,'Reading':'CURNUM'"
nginx_acquisition_json="nginx_acquisition_json,'Writing':'CURNUM'"
nginx_acquisition_json="nginx_acquisition_json,'Waiting':'CURNUM'"
return
fi
getReading
getWriting
getWaiting
}
#分析结果 给出给出状态
function analysisResult(){
result="{'cpname':'nginx',{nginx_acquisition_json#*,}}"
result=`echo {result//\'/\"}`
echoresult
}
#输出错误信息并且退出程序
function showHelp(){
if [ "1" != "" ];then
echo "请输入1"
fi
echo "check_naginx.sh 可以监听本地的nginx的状态 参数如下"
echo "check_naginx.sh [-p <<port>>] [-u <<url>>]"
echo "-p 表示nginx-status的端口"
echo "-u 表示nginx-status的地址"
exit 1
}
while getopts "u:p:h" arg
do
case arg in
p)
PORT="OPTARG"
;;
u)
NGINXSTA="OPTARG"
;;
h)
showHelp
;;
?) showHelp
;;
esac
done
if [[ "NGINXSTA" == "" ]];then
showHelp "nginx-status的地址"
fi
#插件正题部分
#确定存活的
#启动
#分析
isAlive
analysisStat
analysisResult
使用:
[root@ja ~]# bash check_nginx.sh -p 80 -u 127.0.0.1
{"cpname":"nginx","FLAGEPID":pid=7140,"RUNTIME":"","ConnectionNum":0,"Memory":4,"CPU":0.00,"STARTTIME":"","Reading":"0","Writing":"0","Waiting":"0"}