
#! /bin/bash
export LANG=en_US.UTF-8
#WARNLIMIT=-1 #警告值 超过该值则认为是需要告警
#ERRORLIMIT=-1 #错误值 超过该值认为是错误
CURNUM= #当前的性能参数
#是否输出RUNTIME
RUNTIME=1
#是否输出STARTTIME
STARTTIME=1
WranResultStr= #最后形成的告警字符串
ErrorResultStr= #最后形成的错误字符串
DataResultStr= #性能呢个参数字符串
BASEDIR="/usr/local/mongodb/bin/"
MONGOHOST=127.0.0.1
USER=
PASSWORD=
PORT=27021
FAULTS=-1
FAULTSMIN=-1
FAULTSMAX=-1
RES=-1
RESMIN=-1
RESMAX=-1
CONN=-1
CONNMIN=-1
CONNMAX=-1
QUOTASTR=
quota=
mongos_acquisition_json=""
function gotErr(){
if [ "$1" -eq 0 ];then
echo "mailstatedes=各项指标恢复正常 statedes=各项指标恢复正常|$DataResultStr"
elif [ "$1" -eq 1 ];then
echo "quota=${quota} mailstatedes=$WranResultStr statedes=$WranResultStr ${QUOTASTR} |$DataResultStr"
elif [ "$1" -eq 2 ];then
if [[ "${WranResultStr}" != "" ]]; then
echo "quota=${quota} mailstatedes=${WranResultStr},${ErrorResultStr} statedes=${WranResultStr},${ErrorResultStr} ${QUOTASTR}|$DataResultStr"
else
echo "quota=${quota} mailstatedes=${ErrorResultStr} statedes=${ErrorResultStr} ${QUOTASTR}|$DataResultStr"
fi
elif [ "$1" -eq 3 ];then
echo "mailstatedes=无法获取到指标,疑似组件故障,请确认并请检查监控脚本和运维平台配置 statedes=无法获取到指标,疑似组件故障,请确认并请检查监控脚本和运维平台配置"
elif [ "$1" -eq 4 ];then
echo "mailstatedes=该组件进程为僵尸进程,请确认并请检查该组件状态 statedes=该组件进程为僵尸进程,请确认并请检查该组件状态|$DataResultStr"
fi
exit $1
}
#第一个参数是目标字符串
#第二个参数是要匹配的字符串
function contain(){
# echo $1 $2
result=$(echo "$1" |head -n 1| grep "$2")
if [[ "$result" == "" ]];then
return 1
else
return 0
fi
}
#返回是否服务是否存活以及状态字符串
#如果无法返回 则认为可能是宕机
function getMongoDBStat(){
if [ "$USER" != "" ];then
mongoStatStr=`${BASEDIR}/mongostat -u $USER --port $PASSWORD -host $MONGOHOST -p $PORT -n 1 --noheaders | tail -n 1`
else
mongoStatStr=`${BASEDIR}/mongostat -host $MONGOHOST --port $PORT -n 1 --noheaders | tail -n 1`
fi
#if [[ "$mongoStatStr" == "" ]];then
#gotErr 3
#fi
#ZOMBIE=$(ps -A -ostat,pid | grep -e '^[Zz]' | grep $FLAGEID)
#if [[ "$ZOMBIE" != "" ]];then
#gotErr 4
#fi
return 0
}
#ps -eo pid,etime 是查询进程运行时间
function getRuntime(){
CURNUM=$(ps -eo pid,etime | grep $FLAGEID |awk '{print $2}')
mongos_acquisition_json="$mongos_acquisition_json,'runtime':'$CURNUM'"
}
#ps -eo pid,lstart是查询进程的开始时间
function getStarttime(){
CURNUM=$(ps -eo pid,lstart | grep $FLAGEID |awk '{print $2 ,$3 ,$4 ,$5 ,$6}')
mongos_acquisition_json="$mongos_acquisition_json,'starttime':'$CURNUM'"
}
#依据传入的参数值来判断我们要监控的服务是什么
#然后通过各个函数获取到具体的值
#值之间的
function analysisStat(){
#if [ "$FAULTS" -eq 1 ];then
CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $9}' | sed "s/[^0-9]//g")
#parseFileds "FAULTSNum" $FAULTSMIN $FAULTSMAX
mongos_acquisition_json="$mongos_acquisition_json,'faultsnum':'$CURNUM'"
#fi
#if [ "$RES" -eq 1 ];then
CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $8}' | sed "s/[^0-9\.mg]//g")
ism=$(echo $CURNUM | grep "g")
if [[ "$ism" != "" ]];then
CURNUM=$(echo $CURNUM | sed "s/[^0-9\.]//g")
CURNUM=`awk 'BEGIN{printf "%.2f\n",'$CURNUM'}'`
else
CURNUM=$(echo $CURNUM | sed "s/[^0-9\.]//g")
fi
mongos_acquisition_json="$mongos_acquisition_json,'memory(m)':$CURNUM"
#parseFileds "Memory(M)" $RESMIN $RESMAX
#fi
#if [ "$CONN" -eq 1 ];then
CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $12}' | sed "s/[^0-9\.]//g")
mongos_acquisition_json="$mongos_acquisition_json,'connnum':$CURNUM"
#parseFileds "ConnNum" $CONNMIN $CONNMAX
#fi
#if [ "$RUNTIME" -eq 1 ];then
getRuntime
#parseFileds "RUNTIME" $RUNTIME $RUNTIME
#fi
#if [ "$STARTTIME" -eq 1 ];then
getStarttime
#parseFileds "STARTTIME" $STARTTIME $STARTTIME
#fi
#CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $1}' | sed "s/[^0-9\.]//g")
#DataResultStr="$DataResultStr insert=$CURNUM;9999999;9999999;0;0"
#CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $2}' | sed "s/[^0-9\.]//g")
#DataResultStr="$DataResultStr query=$CURNUM;9999999;9999999;0;0"
#CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $3}' | sed "s/[^0-9\.]//g")
#DataResultStr="$DataResultStr update=$CURNUM;9999999;9999999;0;0"
#CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $4}' | sed "s/[^0-9\.]//g")
#DataResultStr="$DataResultStr delete=$CURNUM;9999999;9999999;0;0"
result="{'cpname':'mongos',${mongos_acquisition_json#*,}}"
result=`echo ${result//\'/\"}`
echo $result
}
#CURNUM 当前值
#WARNLIMIT 告警的上限
#ERRORLIMIT 异常的上限
#判断以上三个值并拼接结果字符串
function parseFileds(){
val1=$(echo "$CURNUM $2"| awk '{if($1<=$2){print 1}else{print 0}}')
val2=$(echo "$CURNUM $3"| awk '{if($1<=$2){print 1}else{print 0}}')
DataResultStr="$DataResultStr $1=$CURNUM;$2;$3;0;0"
if [ $val1 -eq 1 ];then
return 0
elif [[ $val1 -eq 0 && $val2 -eq 1 ]];then
if [[ ${WranResultStr} != "" ]]; then
WranResultStr="${WranResultStr},"
fi
WranResultStr="${WranResultStr}$1当前值为${CURNUM}超过告警值${2}"
if [[ ${quota} != "" ]]; then
quota="${quota},"
fi
quota="${quota}$1"
QUOTASTR="${QUOTASTR}$1=$1当前值为${CURNUM}超过告警值${2} "
return 1
else
if [[ ${ErrorResultStr} != "" ]]; then
ErrorResultStr="${ErrorResultStr},"
fi
ErrorResultStr="${ErrorResultStr}$1当前值为${CURNUM}超过紧急值${3}"
if [[ ${quota} != "" ]]; then
quota="${quota},"
fi
quota="${quota}$1"
QUOTASTR="${QUOTASTR}$1=$1当前值为${CURNUM}超过紧急值${3} "
return 2
fi
}
#分析结果 给出给出状态
function analysisResult(){
if [[ "$WranResultStr" == "" && "$ErrorResultStr" == "" ]];then
gotErr 0
elif [[ "$ErrorResultStr" == "" ]];then
gotErr 1
else
gotErr 2
fi
}
#输出错误信息并且退出程序
function showHelp(){
if [ "$1" != "" ];then
echo "请输入$1"
fi
echo "check_mongodb.sh 可以监听本地的mongodb的状态 参数如下"
echo "check_mongodb.sh [-w 每秒失败,内存(M),锁时间%,连接,等待读取,等待写] [-c 每秒失败,内存(M),锁时间%,连接,等待读取,等待写] [-u <str>] [-p <str>] [-b <str>] [-P <str>]"
echo "-u 表示用户名 可以不写"
echo "-p 表示密码 可以不写"
echo "-P 表示端口号"
echo "-b 表示基础路径 默认为/usr/local/mongodb/bin/"
exit 1
}
#检查参数是不是数字
function isNumber(){
isNum=$(echo "$1" | awk '{print($0~/^([0-9])+[\.]?([0-9])?$/)?1:0}')
return $isNum
}
function isLarge(){
isNumber $1
if [[ $? -eq 0 ]]; then
return 1
fi
isNumber $2
if [[ $? -eq 0 ]]; then
return 1
fi
isLarge=$(echo "$1 $2"| awk '{if($1>=$2){print 1}else{print 0}}')
if [ $isLarge -eq 1 ];then
return 0
else
return 1
fi
}
function checkARG(){
if [[ "$2" != "" && "$3" != "" ]];then
isLarge "$3" "$2"
if [[ $? -eq 1 ]]; then
return 1
else
$1=1
fi
fi
return 0
}
#检查参数是否合法
#即参数是否是小数和整数
#对于守护进程的选择 这个值可以不验证
function checkALL(){
if [[ "$FAULTSMIN" != "" || "$FAULTSMAX" != "" ]];then
if [[ "$FAULTSMAX" == "" || "$FAULTSMIN" == "" ]]; then
return 1
fi
isLarge "$FAULTSMAX" "$FAULTSMIN"
if [[ $? -eq 1 ]]; then
return 1
else
FAULTS=1
fi
fi
if [[ "$RESMIN" != "" || "$RESMAX" != "" ]];then
if [[ "$RESMAX" == "" || "$RESMIN" == "" ]]; then
return 1
fi
isLarge "$RESMAX" "$RESMIN"
if [[ $? -eq 1 ]]; then
return 1
else
RES=1
fi
fi
if [[ "$CONNMIN" != "" || "$CONNMAX" != "" ]];then
if [[ "$CONNMAX" == "" || "$CONNMIN" == "" ]]; then
return 1
fi
isLarge "$CONNMAX" "$CONNMIN"
if [[ $? -eq 1 ]]; then
return 1
else
CONN=1
fi
fi
return 0
}
function parseARG(){
#it mean warning
if [ $1 -eq 1 ];then
FAULTSMIN=$(echo $2 | awk -F ',' '{print $1}' | sed "s/[^0-9]//g")
RESMIN=$(echo $2 | awk -F ',' '{print $2}' | sed "s/[^0-9]//g")
CONNMIN=$(echo $2 | awk -F ',' '{print $3}' | sed "s/[^0-9]//g")
else
FAULTSMAX=$(echo $2 | awk -F ',' '{print $1}' | sed "s/[^0-9]//g")
RESMAX=$(echo $2 | awk -F ',' '{print $2}' | sed "s/[^0-9]//g")
CONNMAX=$(echo $2 | awk -F ',' '{print $3}' | sed "s/[^0-9]//g")
fi
}
while getopts "u:p:b:P:H:h" arg
do
case $arg in
h)
showHelp
;;
u)
USER=$OPTARG
;;
p)
#密码
PASSWORD=$OPTARG
;;
P)
PORT=$OPTARG
;;
H)
MONGOHOST=$OPTARG
;;
b)
BASEDIR=$OPTARG
;;
?)
showHelp
;;
esac
done
#checkALL
if [[ $? == 1 ]];then
showHelp "正确参数"
fi
if [[ "$FAULTS" -eq -1 && "$RES" -eq -1 && "$LOCKTIME" -eq -1 && "$CONN" -eq -1 && "$QR" -eq -1 && "$QW" -eq -1 ]];then
showHelp "要监控项"
fi
#插件正题部分
#确定存活的
#启动
#分析
getMongoDBStat
analysisStat
#analysisResult