
#! /bin/bash
export LANG=en_US.UTF-8
#WARNLIMIT=-1 #警告值 超过该值则认为是需要告警
#ERRORLIMIT=-1 #错误值 超过该值认为是错误
CURNUM= #当前的性能参数
WranResultStr= #最后形成的告警字符串
ErrorResultStr= #最后形成的错误字符串
DataResultStr= #性能呢个参数字符串
BASEDIR="/usr/local/mongodb/bin/"
HASBASEDIR=1
MONGOHOST=127.0.0.1
USER=
PASSWORD=
PORT=27017
THRESHOLD=100
FAULTS=-1
FAULTSMIN=-1
FAULTSMAX=-1
RES=-1
RESMIN=-1
RESMAX=-1
LOCKTIME=-1
LOCKTIMEMIN=-1
LOCKTIMEMAX=-1
CONN=-1
CONNMIN=-1
CONNMAX=-1
QR=-1
QRMIN=-1
QRMAX=-1
QW=-1
QWMIN=-1
QWMAX=-1
SLOW_QUERY=-1
SLOW_QUERY_MIN=-1
SLOW_QUERY_MAX=-1
COPY_SET=-1
COPY_SET_MIN=-1
COPY_SET_MAX=-1
replset=
repl_role=
ok=
uptime=
version=
connections_current=
connections_available=
mem_bits=
mem_resident=
mem_virtual=
mem_supported=
mem_mapped=
mem_mappedWithJournal=
network_bytesIn_persecond=
network_bytesOut_persecond=
network_numRequests_persecond=
opcounters_insert_persecond=
opcounters_query_persecond=
opcounters_update_persecond=
opcounters_delete_persecond=
FLAGEPID=
#是否输出FLAGPID
FLAGEOPID=1
#是否输出RUNTIME
RUNTIME=1
#是否输出STARTTIME
STARTTIME=0
CORENUM=1
#################################上一次指标值#############################################
last_network_bytesIn_time=
last_network_bytesOut_time=
last_network_numRequests_time=
last_opcounters_insert_time=
last_opcounters_query_time=
last_opcounters_update_time=
last_opcounters_delete_time=
last_network_bytesIn_persecond=
last_network_bytesOut_persecond=
last_network_numRequests_persecond=
last_opcounters_insert_persecond=
last_opcounters_query_persecond=
last_opcounters_update_persecond=
last_opcounters_delete_persecond=
function initLast(){
if [ ! -f ${PORT}.txt ]; then
touch ${PORT}.txt
fi
last_network_bytesIn_time=$(cat ${PORT}.txt |grep network_bytesIn|awk '{print $3}')
last_network_bytesOut_time=$(cat ${PORT}.txt |grep network_bytesOut|awk '{print $3}')
last_network_numRequests_time=$(cat ${PORT}.txt |grep network_numRequests|awk '{print $3}')
last_opcounters_insert_time=$(cat ${PORT}.txt |grep opcounters_insert|awk '{print $3}')
last_opcounters_query_time=$(cat ${PORT}.txt |grep opcounters_query|awk '{print $3}')
last_opcounters_update_time=$(cat ${PORT}.txt |grep opcounters_update|awk '{print $3}')
last_opcounters_delete_time=$(cat ${PORT}.txt |grep opcounters_delete|awk '{print $3}')
last_network_bytesIn_persecond=$(cat ${PORT}.txt |grep network_bytesIn|awk '{print $2}')
last_network_bytesOut_persecond=$(cat ${PORT}.txt |grep network_bytesOut|awk '{print $2}')
last_network_numRequests_persecond=$(cat ${PORT}.txt |grep network_numRequests|awk '{print $2}')
last_opcounters_insert_persecond=$(cat ${PORT}.txt |grep opcounters_insert|awk '{print $2}')
last_opcounters_query_persecond=$(cat ${PORT}.txt |grep opcounters_query|awk '{print $2}')
last_opcounters_update_persecond=$(cat ${PORT}.txt |grep opcounters_update|awk '{print $2}')
last_opcounters_delete_persecond=$(cat ${PORT}.txt |grep opcounters_delete|awk '{print $2}')
echo "##" > ${PORT}.txt
}
#返回是否服务是否存活
function isAlive(){
result=$(netstat -lntp|grep -w ${PORT}| grep -w mongod| wc -l)
if [[ "$result" -eq 0 ]];then
gotErr 3
fi
}
function gotErr(){
if [ "$1" -eq 2 ];then
result="{'cpname':'mongodb',$DataResultStr}"
result=`echo ${result//\'/\"}`
echo $result
elif [ "$1" -eq 3 ];then
echo "无法获取到指标,疑似组件故障,请确认并请检查监控脚本和运维平台配置"
elif [ "$1" -eq 4 ];then
echo "该组件进程为僵尸进程,请确认并请检查该组件状态"
exit 3
fi
exit $1
}
#第一个参数是目标字符串
#第二个参数是要匹配的字符串
function contain(){
# echo $1 $2
result=$(echo "$1" |head -n 1| grep "$2")
if [[ "$result" == "" ]];then
return 1
else
return 0
fi
}
#ps -eo pid,etime 是查询进程运行时间
function getRuntime(){
CURNUM=$(ps -eo pid,etime | grep $FLAGEPID |awk '{if($1=='$FLAGEPID') print $2}')
}
#ps -eo pid,lstart是查询进程的开始时间
function getStarttime(){
CURNUM=$(ps -eo pid,lstart | grep $FLAGEPID |awk '{if($1=='$FLAGEPID') print $2 ,$3 ,$4 ,$5 ,$6}')
}
#ps aux 4是内存 累加4的值
function getMemory(){
CURNUM=$(ps aux | grep $FLAGEPID |awk 'BEGIN{sum=0}{sum+=$4}END{print sum}')
}
#ps aux 3是CPU 累加3的值
function getCPU(){
CURNUM=$(ps aux | grep $FLAGEPID |awk 'BEGIN{sum=0}{sum+=$3}END{print sum}')
if [ "$CORENUM" -ne 0 ];then
CURNUM=$(printf "%.2f" `echo "scale=2;$CURNUM/$CORENUM" | bc`)
fi
}
function getCopySet(){
state1=`cat ${PORT}state.txt`
state2=$(echo $mongoStatStr | awk -F ' ' '{print $(NF-1)}')
echo "$state2" > ${PORT}state.txt
if [ "${state1}" = "${state2}" -o "${state1}" = "" ];then
CURNUM=0
else
CURNUM=2
fi
}
function replset(){
replcode=`$mongoStr --eval="rs.status().code"`
if [[ "${replcode}" -eq 76 ]];then
CURNUM=0
else
CURNUM=1
fi
}
function repl_role(){
ismaster=`$mongoStr --eval="db.isMaster().ismaster"`
if [[ "${ismaster}" == 'true' ]];then
CURNUM=1
else
CURNUM=0
fi
}
function ok(){
CURNUM=`$mongoStr --eval="db.serverStatus().ok"`
}
function uptime(){
CURNUM=`$mongoStr --eval="db.serverStatus().uptime"`
}
function version(){
CURNUM=`$mongoStr --eval="db.serverStatus().version"`
}
function connections_current(){
CURNUM=`$mongoStr --eval="db.serverStatus().connections.current"`
}
function connections_available(){
CURNUM=`$mongoStr --eval="db.serverStatus().connections.available"`
}
function mem_bits(){
CURNUM=`$mongoStr --eval="db.serverStatus().mem.bits"`
}
function mem_resident(){
CURNUM=`$mongoStr --eval="db.serverStatus().mem.resident"`
}
function mem_virtual(){
CURNUM=`$mongoStr --eval="db.serverStatus().mem.virtual"`
}
function mem_supported(){
memSupported=`$mongoStr --eval="db.serverStatus().mem.supported"`
if [ $memSupported == "true" ];then
CURNUM=0
else
CURNUM=1
fi
}
function miss(){
missValue=`$mongoStr --eval="db.serverStatus().indexCounters"`
if [ "$missValue" == "" ];then
CURNUM=0
else
#CURNUM=`$mongoStr --eval="db.serverStatus().indexCounters.btree.missRatio"`
CURNUM=`$mongoStr --eval="db.serverStatus().indexCounters.missRatio"`
fi
}
function mem_mapped(){
CURNUM=`$mongoStr --eval="db.serverStatus().mem.mapped"`
}
function mem_mappedWithJournal(){
CURNUM=`$mongoStr --eval="db.serverStatus().mem.mappedWithJournal"`
}
function network_bytesIn_persecond(){
CURNUM=`$mongoStr --eval="db.serverStatus().network.bytesIn"|sed "s/[^0-9\.]//g"`
nowsec=$(date +%s)
if [[ ${last_network_bytesIn_time} == "" ]];then
echo "network_bytesIn ${CURNUM} ${nowsec}" >> ${PORT}.txt
CURNUM=
return
fi
echo "network_bytesIn ${CURNUM} ${nowsec}" >> ${PORT}.txt
diffsec=$((nowsec-last_network_bytesIn_time))
diff=$((CURNUM-last_network_bytesIn_persecond))
if [ "$diffsec" == "" -o "$diffsec" -eq 0 ];then
CURNUM=0
else
CURNUM=`awk 'BEGIN{printf "%.2f\n",('${diff}'/'${diffsec}')}'`
fi
}
function network_bytesOut_persecond(){
CURNUM=`$mongoStr --eval="db.serverStatus().network.bytesOut"|sed "s/[^0-9\.]//g"`
nowsec=$(date +%s)
if [[ ${last_network_bytesOut_time} == "" ]];then
echo "network_bytesOut ${CURNUM} ${nowsec}" >> ${PORT}.txt
CURNUM=
return
fi
echo "network_bytesOut ${CURNUM} ${nowsec}" >> ${PORT}.txt
diffsec=$((nowsec-last_network_bytesOut_time))
diff=$((CURNUM-last_network_bytesOut_persecond))
if [ "$diffsec" == "" -o "$diffsec" -eq 0 ];then
CURNUM=0
else
CURNUM=`awk 'BEGIN{printf "%.2f\n",('${diff}'/'${diffsec}')}'`
fi
}
function network_numRequests_persecond(){
CURNUM=`$mongoStr --eval="db.serverStatus().network.numRequests"|sed "s/[^0-9\.]//g"`
nowsec=$(date +%s)
if [[ ${last_network_numRequests_time} == "" ]];then
echo "network_numRequests ${CURNUM} ${nowsec}" >> ${PORT}.txt
CURNUM=
return
fi
echo "network_numRequests ${CURNUM} ${nowsec}" >> ${PORT}.txt
diffsec=$((nowsec-last_network_numRequests_time))
diff=$((CURNUM-last_network_numRequests_persecond))
if [ "$diffsec" == "" -o "$diffsec" -eq 0 ];then
CURNUM=0
else
CURNUM=`awk 'BEGIN{printf "%.2f\n",('${diff}'/'${diffsec}')}'`
fi
}
function opcounters_insert_persecond(){
CURNUM=`$mongoStr --eval="db.serverStatus().opcounters.insert"`
nowsec=$(date +%s)
if [[ ${last_opcounters_insert_time} == "" ]];then
echo "opcounters_insert ${CURNUM} ${nowsec}" >> ${PORT}.txt
CURNUM=
return
fi
echo "opcounters_insert ${CURNUM} ${nowsec}" >> ${PORT}.txt
diffsec=$((nowsec-last_opcounters_insert_time))
diff=$((CURNUM-last_opcounters_insert_persecond))
if [ "$diffsec" == "" -o "$diffsec" -eq 0 ];then
CURNUM=0
else
CURNUM=`awk 'BEGIN{printf "%.2f\n",('${diff}'/'${diffsec}')}'`
fi
}
function opcounters_query_persecond(){
CURNUM=`$mongoStr --eval="db.serverStatus().opcounters.query"`
nowsec=$(date +%s)
if [[ ${last_opcounters_query_time} == "" ]];then
echo "opcounters_query ${CURNUM} ${nowsec}" >> ${PORT}.txt
CURNUM=
return
fi
echo "opcounters_query ${CURNUM} ${nowsec}" >> ${PORT}.txt
diffsec=$((nowsec-last_opcounters_query_time))
diff=$((CURNUM-last_opcounters_query_persecond))
if [ "$diffsec" == "" -o "$diffsec" -eq 0 ];then
CURNUM=0
else
CURNUM=`awk 'BEGIN{printf "%.2f\n",('${diff}'/'${diffsec}')}'`
fi
}
function opcounters_update_persecond(){
CURNUM=`$mongoStr --eval="db.serverStatus().opcounters.update"`
nowsec=$(date +%s)
if [[ ${last_opcounters_update_time} == "" ]];then
echo "opcounters_update ${CURNUM} ${nowsec}" >> ${PORT}.txt
CURNUM=
return
fi
echo "opcounters_update ${CURNUM} ${nowsec}" >> ${PORT}.txt
diffsec=$((nowsec-last_opcounters_update_time))
diff=$((CURNUM-last_opcounters_update_persecond))
if [ "$diffsec" == "" -o "$diffsec" -eq 0 ];then
CURNUM=0
else
CURNUM=`awk 'BEGIN{printf "%.2f\n",('${diff}'/'${diffsec}')}'`
fi
}
function opcounters_delete_persecond(){
CURNUM=`$mongoStr --eval="db.serverStatus().opcounters.delete"`
nowsec=$(date +%s)
if [[ ${last_opcounters_delete_time} == "" ]];then
echo "opcounters_delete ${CURNUM} ${nowsec}" >> ${PORT}.txt
CURNUM=
return
fi
echo "opcounters_delete ${CURNUM} ${nowsec}" >> ${PORT}.txt
diffsec=$((nowsec-last_opcounters_delete_time))
diff=$((CURNUM-last_opcounters_delete_persecond))
if [ "$diffsec" == "" -o "$diffsec" -eq 0 ];then
CURNUM=0
else
CURNUM=`awk 'BEGIN{printf "%.2f\n",('${diff}'/'${diffsec}')}'`
fi
}
#返回是否服务是否存活以及状态字符串
#如果无法返回 则认为可能是宕机
function getMongoDBStat(){
CORENUM=`cat /proc/cpuinfo |grep "physical id"|wc -l`
FLAGEPID=`netstat -lnp | grep -w $PORT| grep /mongod | awk '{print $NF}' | awk -F '/' '{print $1}' |head -n 1`
VERSION=$(${BASEDIR}mongostat --version | head -n 1 )
if [[ "$VERSION" =~ ":" && "$VERSION" =~ "3" ]];then
VERSION=3
elif [[ "$VERSION" =~ "2" ]];then
VERSION=2
fi
# user=`echo $USER | openssl aes-128-cbc -k cycore -base64`
# password=`echo $PASSWORD | openssl aes-128-cbc -k cycore -base64`
password=`echo $PASSWORD`
user=`echo $USER`
if [ "$USER" != "" ];then
mongoStatStr=`${BASEDIR}mongostat -u $USER -p $PASSWORD --host $MONGOHOST --port $PORT -n 1 --noheaders --authenticationDatabase=admin| tail -n 1`
mongoStr="${BASEDIR}/mongo $MONGOHOST:$PORT/admin -u $user -p $password --quiet"
else
mongoStatStr=`${BASEDIR}/mongostat --host $MONGOHOST --port $PORT -n 1 --noheaders 2>/dev/null| tail -n 1`
mongoStr="${BASEDIR}/mongo --host $MONGOHOST --port $PORT --quiet"
fi
if [[ "$mongoStatStr" == "" ]];then
HASBASEDIR=0
fi
return 0
}
#依据传入的参数值来判断我们要监控的服务是什么
#然后通过各个函数获取到具体的值
#值之间的
function analysisStat(){
# CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $11}' | sed "s/[^0-9\.mg]//g")
# ism=$(echo $CURNUM | grep "g")
# if [[ "$ism" != "" ]];then
# CURNUM=$(echo $CURNUM | sed "s/[^0-9\.]//g")
# CURNUM=`awk 'BEGIN{printf "%.2f\n",'$CURNUM'}'`
# else
# CURNUM=$(echo $CURNUM | sed "s/[^0-9\.]//g")
# fi
column_size=`echo $mongoStatStr|awk -F ' ' '{print NF}'`
#处理不同的版本的输出字段对应的列下标
if [ "$column_size" -eq 18 ];then
FAULTS_INDEX=10
RES_INDEX=9
LOCKTIME_INDEX=11
CONN_INDEX=17
QRW_INDEX=13
MISS_INDEX=12
elif [ "$column_size" -eq 21 ];then
FAULTS_INDEX=11
RES_INDEX=10
LOCKTIME_INDEX=12
CONN_INDEX=18
QRW_INDEX=14
MISS_INDEX=13
elif [ "$column_size" -eq 20 ];then
FAULTS_INDEX=11
RES_INDEX=10
LOCKTIME_INDEX=12
CONN_INDEX=18
QRW_INDEX=14
MISS_INDEX=13
elif [ "$column_size" -eq 19 ];then
FAULTS_INDEX=11
RES_INDEX=10
LOCKTIME_INDEX=12
CONN_INDEX=18
QRW_INDEX=14
MISS_INDEX=13
elif [ "$column_size" -eq 17 ];then
FAULTS_INDEX=11
RES_INDEX=10
LOCKTIME_INDEX=0
CONN_INDEX=16
QRW_INDEX=12
MISS_INDEX=0
else
echo "列头不匹配,请扩充列头下标代码" 1>&2
gotErr 3
fi
getMemory
parseFileds "Memory"
if [ $VERSION -lt 3 ];then
CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $18}' | sed "s/[^0-9\.]//g")
else
CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $16}' | sed "s/[^0-9\.]//g")
fi
parseFileds "ConnNum"
if [ $VERSION -lt 3 ];then
CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $14}'| awk -F '|' '{print $1}')
else
CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $12}'| awk -F '|' '{print $1}')
fi
parseFileds "QR"
if [ $VERSION -lt 3 ];then
CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $14}'| awk -F '|' '{print $2}')
else
CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $12}'| awk -F '|' '{print $2}')
fi
parseFileds "QW"
slow_query_script="$(cd `dirname $0`;pwd)/check_mongo_slow_query.sh"
slow_result=`$slow_query_script -f $BASEDIR -P $PORT -H $MONGOHOST -w $SLOW_QUERY_MIN -c $SLOW_QUERY_MAX -t ${THRESHOLD}`
TEMPCODE=$?
if (( TEMPCODE == 0 )) ; then
CURNUM=0
parseFileds "MongoDB慢查询"
elif (( TEMPCODE == 3 )) ; then
CURNUM=0
parseFileds "MongoDB慢查询"
else
COUNT=$(echo $slow_result| awk -F '<<COUNT>>' '{print $1}')
CURNUM=${COUNT}
if [ "$CURNUM" == "" ];then
CURNUM=0
fi
parseFileds "MongoDB慢查询"
fi
CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $1}' | sed "s/[^0-9\.]//g")
parseFileds "insert"
CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $2}' | sed "s/[^0-9\.]//g")
parseFileds "query"
CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $3}' | sed "s/[^0-9\.]//g")
parseFileds "update"
CURNUM=$(echo $mongoStatStr | awk -F ' ' '{print $4}' | sed "s/[^0-9\.]//g")
parseFileds "delete"
CURNUM=$(echo $mongoStatStr | awk -v faults_index="$FAULTS_INDEX" -F ' ' '{print $faults_index}' | sed "s/[^0-9]//g")
parseFileds "FAULTSNum"
CURNUM=$(echo $mongoStatStr | awk -v locktime_index="$LOCKTIME_INDEX" -F ' ' '{print $locktime_index}' | awk -F ':' '{print $2}' | sed "s/[^0-9\.]//g")
parseFileds "LOCKTIME"
miss
parseFileds "miss"
getCopySet
parseFileds "COPY_SET"
replset
parseFileds "replset"
repl_role
parseFileds "repl_role"
ok
parseFileds "ok"
uptime
parseFileds "uptime"
version
parseFileds "version"
connections_current
parseFileds "connections_current"
connections_available
parseFileds "connections_available"
mem_bits
parseFileds "mem_bits"
mem_resident
parseFileds "mem_resident"
mem_virtual
parseFileds "mem_virtual"
mem_supported
parseFileds "mem_supported"
mem_mapped
parseFileds "mem_mapped"
mem_mappedWithJournal
parseFileds "mem_mappedWithJournal"
network_bytesIn_persecond
parseFileds "network_bytesIn_persecond"
network_bytesOut_persecond
parseFileds "network_bytesOut_persecond"
network_numRequests_persecond
parseFileds "network_numRequests_persecond"
opcounters_insert_persecond
parseFileds "opcounters_Insert_persecond"
opcounters_query_persecond
parseFileds "opcounters_query_persecond"
opcounters_update_persecond
parseFileds "opcounters_Update_persecond"
opcounters_delete_persecond
parseFileds "opcounters_Delete_persecond"
CURNUM=$FLAGEPID
parseTimeFileds "FLAGEPID"
getRuntime
parseTimeFileds "RUNTIME"
getStarttime
parseTimeFileds "STARTTIME"
}
function parseTimeFileds(){
if [[ "$DataResultStr" != "" ]];then
DataResultStr=$DataResultStr","
fi
DataResultStr="$DataResultStr'$1':'$CURNUM'"
}
#CURNUM 当前值
#WARNLIMIT 告警的上限
#ERRORLIMIT 异常的上限
#判断以上三个值并拼接结果字符串
function parseFileds(){
if [[ "$DataResultStr" != "" ]];then
DataResultStr=$DataResultStr","
fi
DataResultStr="$DataResultStr'$1':'$CURNUM'"
}
#分析结果 给出给出状态
function analysisResult(){
gotErr 2
}
#输出错误信息并且退出程序
function showHelp(){
if [ "$1" != "" ];then
echo "请输入$1"
fi
echo "check_mongodb.sh 可以监听本地的mongodb的状态 参数如下"
echo "check_mongodb.sh [-u <str>] [-p <str>] [-b <str>] [-P <str>] [-t <str>]"
echo "-u 表示用户名 可以不写"
echo "-p 表示密码 可以不写"
echo "-P 表示端口号"
echo "-b 表示基础路径 默认为/usr/local/mongodb/bin/"
echo "-t 表示设置记录MongoDB慢查询语句的时间阈值"
exit 1
}
while getopts "u:p:b:P:H:t:h" arg
do
case $arg in
h)
showHelp
;;
u)
USER=$OPTARG
;;
p)
#密码
PASSWORD=$OPTARG
;;
P)
PORT=$OPTARG
;;
H)
MONGOHOST=$OPTARG
;;
b)
BASEDIR=$OPTARG
;;
t)
THRESHOLD=$OPTARG
;;
?)
showHelp
;;
esac
done
if [[ "$?" == "" ]];then
showHelp "特征项"
fi
function parm(){
if [[ "$USER" == "<<username>>" ]];then
USER=
fi
if [[ "$PASSWORD" == "<<password>>" ]];
then
PASSWORD=
fi
}
#插件正题部分
#确定存活的
#启动
#分析
parm
initLast
isAlive
getMongoDBStat
analysisStat
analysisResult