check_mongodb.sh查看mongodb

  sre

#! /bin/bash
export LANG=en_US.UTF-8
#WARNLIMIT=-1      #警告值 超过该值则认为是需要告警
#ERRORLIMIT=-1         #错误值 超过该值认为是错误

CURNUM=             #当前的性能参数

WranResultStr=      #最后形成的告警字符串
ErrorResultStr=     #最后形成的错误字符串
DataResultStr=      #性能呢个参数字符串

BASEDIR="/usr/local/mongodb/bin/"
HASBASEDIR=1
MONGOHOST=127.0.0.1
USER=
PASSWORD=
PORT=27017
THRESHOLD=100

FAULTS=-1
FAULTSMIN=-1
FAULTSMAX=-1
RES=-1
RESMIN=-1
RESMAX=-1
LOCKTIME=-1
LOCKTIMEMIN=-1
LOCKTIMEMAX=-1
CONN=-1
CONNMIN=-1
CONNMAX=-1
QR=-1
QRMIN=-1
QRMAX=-1
QW=-1
QWMIN=-1
QWMAX=-1
SLOW_QUERY=-1
SLOW_QUERY_MIN=-1
SLOW_QUERY_MAX=-1
COPY_SET=-1
COPY_SET_MIN=-1
COPY_SET_MAX=-1

replset=
repl_role=
ok=
uptime=
version=
connections_current=
connections_available=
mem_bits=
mem_resident=
mem_virtual=
mem_supported=
mem_mapped=
mem_mappedWithJournal=
network_bytesIn_persecond=
network_bytesOut_persecond=
network_numRequests_persecond=
opcounters_insert_persecond=
opcounters_query_persecond=
opcounters_update_persecond=
opcounters_delete_persecond=

FLAGEPID=
#是否输出FLAGPID
FLAGEOPID=1
#是否输出RUNTIME
RUNTIME=1
#是否输出STARTTIME
STARTTIME=0
CORENUM=1

#################################上一次指标值#############################################
last_network_bytesIn_time=
last_network_bytesOut_time=
last_network_numRequests_time=
last_opcounters_insert_time=
last_opcounters_query_time=
last_opcounters_update_time=
last_opcounters_delete_time=
last_network_bytesIn_persecond=
last_network_bytesOut_persecond=
last_network_numRequests_persecond=
last_opcounters_insert_persecond=
last_opcounters_query_persecond=
last_opcounters_update_persecond=
last_opcounters_delete_persecond=

function initLast(){
if [ ! -f {PORT}.txt ]; then
 touch{PORT}.txt
fi
last_network_bytesIn_time=(cat{PORT}.txt |grep network_bytesIn|awk '{print 3}')
last_network_bytesOut_time=(cat {PORT}.txt |grep network_bytesOut|awk '{print3}')
last_network_numRequests_time=(cat{PORT}.txt |grep network_numRequests|awk '{print 3}')
last_opcounters_insert_time=(cat {PORT}.txt |grep opcounters_insert|awk '{print3}')
last_opcounters_query_time=(cat{PORT}.txt |grep opcounters_query|awk '{print 3}')
last_opcounters_update_time=(cat {PORT}.txt |grep opcounters_update|awk '{print3}')
last_opcounters_delete_time=(cat{PORT}.txt |grep opcounters_delete|awk '{print 3}')
last_network_bytesIn_persecond=(cat {PORT}.txt |grep network_bytesIn|awk '{print2}')
last_network_bytesOut_persecond=(cat{PORT}.txt |grep network_bytesOut|awk '{print 2}')
last_network_numRequests_persecond=(cat {PORT}.txt |grep network_numRequests|awk '{print2}')
last_opcounters_insert_persecond=(cat{PORT}.txt |grep opcounters_insert|awk '{print 2}')
last_opcounters_query_persecond=(cat {PORT}.txt |grep opcounters_query|awk '{print2}')
last_opcounters_update_persecond=(cat{PORT}.txt |grep opcounters_update|awk '{print 2}')
last_opcounters_delete_persecond=(cat {PORT}.txt |grep opcounters_delete|awk '{print2}')
echo "##" > {PORT}.txt
}

#返回是否服务是否存活
function isAlive(){
    result=(netstat -lntp|grep -w {PORT}| grep -w mongod| wc -l)
    if [[ "result" -eq 0 ]];then
        gotErr 3
    fi
}

function gotErr(){
    if [ "1" -eq 2 ];then        result="{'cpname':'mongodb',DataResultStr}"
        result=`echo {result//\'/\"}`
        echoresult
    elif [ "1" -eq 3 ];then        echo "无法获取到指标,疑似组件故障,请确认并请检查监控脚本和运维平台配置"
    elif [ "1" -eq 4 ];then
        echo "该组件进程为僵尸进程,请确认并请检查该组件状态"
        exit 3
    fi

    exit 1
}

#第一个参数是目标字符串
#第二个参数是要匹配的字符串
function contain(){
# echo1 2
    result=(echo "1" |head -n 1| grep "2")

    if [[ "result" == "" ]];then
        return 1
    else
        return 0

    fi
}

#ps -eo pid,etime 是查询进程运行时间
function getRuntime(){
    CURNUM=(ps -eo pid,etime | grep FLAGEPID |awk '{if(1=='FLAGEPID') print2}')
}

#ps -eo pid,lstart是查询进程的开始时间
function getStarttime(){
    CURNUM=(ps -eo pid,lstart | grepFLAGEPID |awk '{if(1=='FLAGEPID') print 2 ,3 ,4 ,5 ,6}')
}

#ps aux 4是内存 累加4的值
function getMemory(){
    CURNUM=(ps aux | grep FLAGEPID |awk 'BEGIN{sum=0}{sum+=4}END{print sum}')
}

#ps aux 3是CPU 累加3的值
function getCPU(){
    CURNUM=(ps aux | grepFLAGEPID |awk 'BEGIN{sum=0}{sum+=3}END{print sum}')
    if [ "CORENUM" -ne 0 ];then
       CURNUM=(printf "%.2f" `echo "scale=2;CURNUM/CORENUM" | bc`)
    fi
}

function getCopySet(){
     state1=`cat{PORT}state.txt`
        state2=(echomongoStatStr | awk -F ' ' '{print (NF-1)}')
        echo "state2" > {PORT}state.txt
        if [ "{state1}" = "{state2}" -o "{state1}" = "" ];then
            CURNUM=0
        else
            CURNUM=2
        fi
}

function replset(){
replcode=`mongoStr --eval="rs.status().code"`
if [[ "{replcode}" -eq 76 ]];then
    CURNUM=0
else
    CURNUM=1
fi
}
function repl_role(){
ismaster=`mongoStr --eval="db.isMaster().ismaster"`
if [[ "{ismaster}" == 'true' ]];then
    CURNUM=1
else
    CURNUM=0
fi
}
function ok(){
CURNUM=`mongoStr --eval="db.serverStatus().ok"`
}
function uptime(){
CURNUM=`mongoStr --eval="db.serverStatus().uptime"`
}
function version(){
CURNUM=`mongoStr --eval="db.serverStatus().version"`
}
function connections_current(){
CURNUM=`mongoStr --eval="db.serverStatus().connections.current"`
}
function connections_available(){
CURNUM=`mongoStr --eval="db.serverStatus().connections.available"`
}
function mem_bits(){
CURNUM=`mongoStr --eval="db.serverStatus().mem.bits"`
}
function mem_resident(){
CURNUM=`mongoStr --eval="db.serverStatus().mem.resident"`
}
function mem_virtual(){
CURNUM=`mongoStr --eval="db.serverStatus().mem.virtual"`
}
function mem_supported(){
memSupported=`mongoStr --eval="db.serverStatus().mem.supported"`
if [memSupported == "true" ];then
    CURNUM=0
else
    CURNUM=1
fi
}
function miss(){
missValue=`mongoStr --eval="db.serverStatus().indexCounters"`
if [ "missValue" == "" ];then
   CURNUM=0
else
   #CURNUM=`mongoStr --eval="db.serverStatus().indexCounters.btree.missRatio"`
    CURNUM=`mongoStr --eval="db.serverStatus().indexCounters.missRatio"`
fi
}

function mem_mapped(){
CURNUM=`mongoStr --eval="db.serverStatus().mem.mapped"`
}
function mem_mappedWithJournal(){
CURNUM=`mongoStr --eval="db.serverStatus().mem.mappedWithJournal"`
}
function network_bytesIn_persecond(){
CURNUM=`mongoStr --eval="db.serverStatus().network.bytesIn"|sed "s/[^0-9\.]//g"`
nowsec=(date +%s)
if [[ {last_network_bytesIn_time} == "" ]];then
echo "network_bytesIn{CURNUM} {nowsec}" >>{PORT}.txt
CURNUM=
return
fi
echo "network_bytesIn {CURNUM}{nowsec}" >> {PORT}.txt
diffsec=((nowsec-last_network_bytesIn_time))
diff=((CURNUM-last_network_bytesIn_persecond))
if [ "diffsec" == "" -o "diffsec" -eq 0 ];then
CURNUM=0
else
CURNUM=`awk 'BEGIN{printf "%.2f\n",('{diff}'/'{diffsec}')}'`
fi
}
function network_bytesOut_persecond(){
CURNUM=`mongoStr --eval="db.serverStatus().network.bytesOut"|sed "s/[^0-9\.]//g"`
nowsec=(date +%s)
if [[{last_network_bytesOut_time} == "" ]];then
echo "network_bytesOut {CURNUM}{nowsec}" >> {PORT}.txt
CURNUM=
return
fi
echo "network_bytesOut{CURNUM} {nowsec}" >>{PORT}.txt
diffsec=((nowsec-last_network_bytesOut_time))
diff=((CURNUM-last_network_bytesOut_persecond))
if [ "diffsec" == "" -o "diffsec" -eq 0 ];then
CURNUM=0
else
CURNUM=`awk 'BEGIN{printf "%.2f\n",('{diff}'/'{diffsec}')}'`
fi

}
function network_numRequests_persecond(){
CURNUM=`mongoStr --eval="db.serverStatus().network.numRequests"|sed "s/[^0-9\.]//g"`
nowsec=(date +%s)
if [[ {last_network_numRequests_time} == "" ]];then
echo "network_numRequests{CURNUM} {nowsec}" >>{PORT}.txt
CURNUM=
return
fi
echo "network_numRequests {CURNUM}{nowsec}" >> {PORT}.txt
diffsec=((nowsec-last_network_numRequests_time))
diff=((CURNUM-last_network_numRequests_persecond))
if [ "diffsec" == "" -o "diffsec" -eq 0 ];then
CURNUM=0
else
CURNUM=`awk 'BEGIN{printf "%.2f\n",('{diff}'/'{diffsec}')}'`
fi
}
function opcounters_insert_persecond(){
CURNUM=`mongoStr --eval="db.serverStatus().opcounters.insert"`
nowsec=(date +%s)
if [[{last_opcounters_insert_time} == "" ]];then
echo "opcounters_insert {CURNUM}{nowsec}" >> {PORT}.txt
CURNUM=
return
fi
echo "opcounters_insert{CURNUM} {nowsec}" >>{PORT}.txt
diffsec=((nowsec-last_opcounters_insert_time))
diff=((CURNUM-last_opcounters_insert_persecond))
if [ "diffsec" == "" -o "diffsec" -eq 0 ];then
CURNUM=0
else
CURNUM=`awk 'BEGIN{printf "%.2f\n",('{diff}'/'{diffsec}')}'`
fi
}
function opcounters_query_persecond(){
CURNUM=`mongoStr --eval="db.serverStatus().opcounters.query"`
nowsec=(date +%s)
if [[ {last_opcounters_query_time} == "" ]];then
echo "opcounters_query{CURNUM} {nowsec}" >>{PORT}.txt
CURNUM=
return
fi
echo "opcounters_query {CURNUM}{nowsec}" >> {PORT}.txt
diffsec=((nowsec-last_opcounters_query_time))
diff=((CURNUM-last_opcounters_query_persecond))
if [ "diffsec" == "" -o "diffsec" -eq 0 ];then
CURNUM=0
else
CURNUM=`awk 'BEGIN{printf "%.2f\n",('{diff}'/'{diffsec}')}'`
fi
}
function opcounters_update_persecond(){
CURNUM=`mongoStr --eval="db.serverStatus().opcounters.update"`
nowsec=(date +%s)
if [[{last_opcounters_update_time} == "" ]];then
echo "opcounters_update {CURNUM}{nowsec}" >> {PORT}.txt
CURNUM=
return
fi
echo "opcounters_update{CURNUM} {nowsec}" >>{PORT}.txt
diffsec=((nowsec-last_opcounters_update_time))
diff=((CURNUM-last_opcounters_update_persecond))
if [ "diffsec" == "" -o "diffsec" -eq 0 ];then
CURNUM=0
else
CURNUM=`awk 'BEGIN{printf "%.2f\n",('{diff}'/'{diffsec}')}'`
fi
}
function opcounters_delete_persecond(){
CURNUM=`mongoStr --eval="db.serverStatus().opcounters.delete"`
nowsec=(date +%s)
if [[ {last_opcounters_delete_time} == "" ]];then
echo "opcounters_delete{CURNUM} {nowsec}" >>{PORT}.txt
CURNUM=
return
fi
echo "opcounters_delete {CURNUM}{nowsec}" >> {PORT}.txt
diffsec=((nowsec-last_opcounters_delete_time))
diff=((CURNUM-last_opcounters_delete_persecond))
if [ "diffsec" == "" -o "diffsec" -eq 0 ];then
CURNUM=0
else
CURNUM=`awk 'BEGIN{printf "%.2f\n",('{diff}'/'{diffsec}')}'`
fi
}



#返回是否服务是否存活以及状态字符串
#如果无法返回 则认为可能是宕机
function getMongoDBStat(){
    CORENUM=`cat /proc/cpuinfo |grep "physical id"|wc -l`
    FLAGEPID=`netstat -lnp | grep -wPORT| grep /mongod | awk '{print NF}' | awk -F '/' '{print1}' |head -n 1`
    VERSION=({BASEDIR}mongostat --version | head -n 1 )
    if [[ "VERSION" =~ ":" && "VERSION" =~ "3" ]];then
       VERSION=3
    elif [[ "VERSION" =~ "2" ]];then
       VERSION=2
    fi
  #  user=`echoUSER | openssl aes-128-cbc -k cycore -base64`
   # password=`echo PASSWORD | openssl aes-128-cbc  -k cycore -base64`
   password=`echoPASSWORD`
   user=`echo USER`
    if [ "USER" != "" ];then
        mongoStatStr=`{BASEDIR}mongostat -uUSER -p PASSWORD --hostMONGOHOST --port PORT -n 1 --noheaders --authenticationDatabase=admin| tail -n 1`
        mongoStr="{BASEDIR}/mongo MONGOHOST:PORT/admin -u user -ppassword --quiet"
    else
        mongoStatStr=`{BASEDIR}/mongostat --hostMONGOHOST --port PORT -n 1 --noheaders 2>/dev/null| tail -n 1`
        mongoStr="{BASEDIR}/mongo --host MONGOHOST --portPORT --quiet"
    fi

    if [[ "mongoStatStr" == "" ]];then
        HASBASEDIR=0
    fi
    return 0
}

#依据传入的参数值来判断我们要监控的服务是什么
#然后通过各个函数获取到具体的值
#值之间的
function analysisStat(){
#      CURNUM=(echo mongoStatStr | awk -F ' ' '{print11}' | sed "s/[^0-9\.mg]//g")
#      ism=(echoCURNUM | grep "g")
#      if [[ "ism" != "" ]];then
#          CURNUM=(echo CURNUM | sed "s/[^0-9\.]//g")
#          CURNUM=`awk 'BEGIN{printf "%.2f\n",'CURNUM'}'`
#      else
#          CURNUM=(echoCURNUM | sed "s/[^0-9\.]//g")
#      fi
         column_size=`echo mongoStatStr|awk -F ' ' '{print NF}'`
    #处理不同的版本的输出字段对应的列下标
    if [ "column_size" -eq 18 ];then
        FAULTS_INDEX=10
        RES_INDEX=9
        LOCKTIME_INDEX=11
        CONN_INDEX=17
        QRW_INDEX=13
        MISS_INDEX=12
    elif [ "column_size" -eq 21 ];then
        FAULTS_INDEX=11
        RES_INDEX=10
        LOCKTIME_INDEX=12
        CONN_INDEX=18
        QRW_INDEX=14
        MISS_INDEX=13
    elif [ "column_size" -eq 20 ];then
        FAULTS_INDEX=11
        RES_INDEX=10
        LOCKTIME_INDEX=12
        CONN_INDEX=18
        QRW_INDEX=14
        MISS_INDEX=13
    elif [ "column_size" -eq 19 ];then
        FAULTS_INDEX=11
        RES_INDEX=10
        LOCKTIME_INDEX=12
        CONN_INDEX=18
        QRW_INDEX=14
        MISS_INDEX=13
    elif [ "column_size" -eq 17 ];then
        FAULTS_INDEX=11
        RES_INDEX=10
        LOCKTIME_INDEX=0
        CONN_INDEX=16
        QRW_INDEX=12
        MISS_INDEX=0
    else
        echo "列头不匹配,请扩充列头下标代码" 1>&2
        gotErr 3
    fi


        getMemory
        parseFileds "Memory"

        if [ VERSION -lt 3 ];then
           CURNUM=(echo mongoStatStr | awk -F ' ' '{print18}' | sed "s/[^0-9\.]//g")
        else
           CURNUM=(echomongoStatStr | awk -F ' ' '{print 16}' | sed "s/[^0-9\.]//g")
        fi
        parseFileds "ConnNum" 

        if [VERSION -lt 3 ];then
           CURNUM=(echomongoStatStr | awk -F ' ' '{print 14}'| awk -F '|' '{print1}')
        else
           CURNUM=(echomongoStatStr | awk -F ' ' '{print 12}'| awk -F '|' '{print1}')
        fi

        parseFileds "QR" 

        if [ VERSION -lt 3 ];then
           CURNUM=(echo mongoStatStr | awk -F ' ' '{print14}'| awk -F '|' '{print 2}')
        else
           CURNUM=(echo mongoStatStr | awk -F ' ' '{print12}'| awk -F '|' '{print 2}')
        fi
        parseFileds "QW" 


        slow_query_script="(cd `dirname 0`;pwd)/check_mongo_slow_query.sh"
        slow_result=`slow_query_script -f BASEDIR -PPORT -H MONGOHOST -wSLOW_QUERY_MIN -c SLOW_QUERY_MAX -t{THRESHOLD}`

        TEMPCODE=?
        if (( TEMPCODE == 0 )) ; then
            CURNUM=0
            parseFileds "MongoDB慢查询"        elif (( TEMPCODE == 3 )) ; then
            CURNUM=0
            parseFileds "MongoDB慢查询"
        else
            COUNT=(echo slow_result| awk -F '<<COUNT>>' '{print1}')
            CURNUM={COUNT}
            if [ "CURNUM" == "" ];then
                CURNUM=0
            fi
            parseFileds "MongoDB慢查询"
        fi

    CURNUM=(echomongoStatStr | awk -F ' ' '{print 1}' | sed "s/[^0-9\.]//g")       parseFileds "insert"

    CURNUM=(echo mongoStatStr | awk -F ' ' '{print2}' | sed "s/[^0-9\.]//g")
    parseFileds "query"

    CURNUM=(echomongoStatStr | awk -F ' ' '{print 3}' | sed "s/[^0-9\.]//g")
    parseFileds "update"

    CURNUM=(echo mongoStatStr | awk -F ' ' '{print4}' | sed "s/[^0-9\.]//g")
    parseFileds "delete"

    CURNUM=(echomongoStatStr | awk -v faults_index="FAULTS_INDEX"  -F ' ' '{printfaults_index}' | sed "s/[^0-9]//g")
    parseFileds "FAULTSNum"

    CURNUM=(echomongoStatStr | awk -v locktime_index="LOCKTIME_INDEX" -F ' ' '{printlocktime_index}' | awk -F ':' '{print 2}' | sed "s/[^0-9\.]//g")
    parseFileds "LOCKTIME"

    miss
    parseFileds "miss"



    getCopySet
    parseFileds "COPY_SET"



    replset
    parseFileds "replset"

    repl_role
    parseFileds "repl_role" 

    ok
    parseFileds "ok" 

    uptime
    parseFileds "uptime" 

    version
    parseFileds "version" 

    connections_current
    parseFileds "connections_current" 

    connections_available
    parseFileds "connections_available" 

    mem_bits
    parseFileds "mem_bits" 

    mem_resident
    parseFileds "mem_resident" 

    mem_virtual
    parseFileds "mem_virtual" 

    mem_supported
    parseFileds "mem_supported" 

    mem_mapped
    parseFileds "mem_mapped" 

    mem_mappedWithJournal
    parseFileds "mem_mappedWithJournal" 

    network_bytesIn_persecond
    parseFileds "network_bytesIn_persecond" 

    network_bytesOut_persecond
    parseFileds "network_bytesOut_persecond" 

    network_numRequests_persecond
    parseFileds "network_numRequests_persecond" 

    opcounters_insert_persecond
    parseFileds "opcounters_Insert_persecond" 

    opcounters_query_persecond
    parseFileds "opcounters_query_persecond" 

    opcounters_update_persecond
    parseFileds "opcounters_Update_persecond" 

    opcounters_delete_persecond
    parseFileds "opcounters_Delete_persecond" 



        CURNUM=FLAGEPID
        parseTimeFileds "FLAGEPID"

        getRuntime
        parseTimeFileds "RUNTIME" 

        getStarttime
        parseTimeFileds "STARTTIME" 

}
function parseTimeFileds(){
    if [[ "DataResultStr" != "" ]];then
        DataResultStr=DataResultStr","
    fi
    DataResultStr="DataResultStr'1':'CURNUM'"
}

#CURNUM            当前值
#WARNLIMIT         告警的上限
#ERRORLIMIT    异常的上限
#判断以上三个值并拼接结果字符串
function parseFileds(){
    if [[ "DataResultStr" != "" ]];then
        DataResultStr=DataResultStr","
    fi
    DataResultStr="DataResultStr'1':'CURNUM'"
}

#分析结果 给出给出状态
function analysisResult(){
        gotErr 2
}

#输出错误信息并且退出程序
function showHelp(){
    if [ "1" != "" ];then
        echo "请输入1"
    fi
    echo "check_mongodb.sh 可以监听本地的mongodb的状态 参数如下"
    echo "check_mongodb.sh [-u <str>] [-p <str>] [-b <str>] [-P <str>] [-t <str>]"
    echo "-u 表示用户名 可以不写"
    echo "-p 表示密码 可以不写"
    echo "-P 表示端口号"
    echo "-b 表示基础路径 默认为/usr/local/mongodb/bin/"
    echo "-t 表示设置记录MongoDB慢查询语句的时间阈值"
    exit 1
}

while getopts "u:p:b:P:H:t:h" arg
do
    case arg in
        h)
            showHelp
            ;;
        u)
            USER=OPTARG
            ;;
        p)
            #密码
            PASSWORD=OPTARG
            ;;
        P)
            PORT=OPTARG
            ;;
        H)
            MONGOHOST=OPTARG
            ;;
        b)
            BASEDIR=OPTARG
            ;;
        t)
            THRESHOLD=OPTARG
            ;;
        ?)            showHelp
            ;;
    esac
done

if [[ "?" == "" ]];then
    showHelp "特征项"
fi


function parm(){
    if [[ "USER" == "<<username>>" ]];then
    USER=

fi

if [[ "PASSWORD" == "<<password>>" ]];
then
  PASSWORD=

fi

}

#插件正题部分 
#确定存活的
#启动
#分析
parm
initLast
isAlive
getMongoDBStat
analysisStat
analysisResult

LEAVE A COMMENT

Captcha Code