check_zookeeper.sh查看zookeeper信息

  sre

#! /bin/bash
export LANG=en_US.UTF-8

#WARNLIMIT=-1      #警告值 超过该值则认为是需要告警
#ERRORLIMIT=-1         #错误值 超过该值认为是错误

PORT=

IP=localhost
FLAGE=
FLAGEID=
#是否输出FLAGPID
#FLAGEOPID=1
#是否输出RUNTIME
#RUNTIME=1
#是否输出STARTTIME
STARTTIME=0
VERBOSE=0

QUOTASTR=
quota=
zookeeper_acquisition_json=""
CORENUM=1

keys=(FLAGEPID Memory CPU RUNTIME STARTTIME ConnectionNum Thread ClusterState zkState bytesReceived 
bytesSent zkConnections latencyAvg latencyMin latencyMax znodeCount outstandingRequests packetsReceived packetsSent)
#echo {#keys[@]} 看看多少个指标,不包含cpname
declare -A map=()

for var in{keys[@]};  
do  
    map[var]=""  
done

function gotErr(){
    if [ "1" -eq 0 ];then
        echo "mailstatedes=各项指标恢复正常 statedes=各项指标恢复正常|DataResultStr"
    elif [ "1" -eq 3 ];then 
        echo "无法获取到组件PID,疑似组件故障,请确认并请检查监控脚本和运维平台配置"
    elif [ "1" -eq 4 ];then
        echo "该组件进程为僵尸进程,请确认并请检查该组件状态"
        exit 3
    fi

    exit1
}

#ps aux 4是内存 累加4的值
function getMemory(){
    CURNUM=(ps aux | grepFLAGEID |awk 'BEGIN{sum=0}{sum+=4}END{print sum}')
    zookeeper_acquisition_json="zookeeper_acquisition_json,'Memory':CURNUM"
    map["Memory"]="CURNUM"
}


#ps aux 3是CPU 累加3的值
function getCPU(){
    CURNUM=(ps aux | grepFLAGEID |awk 'BEGIN{sum=0}{sum+=3}END{print sum}')
    if [ "CORENUM" -ne 0 ];then
       CURNUM=(printf "%.2f" `echo "scale=2;CURNUM/CORENUM" | bc`)
    fi
    zookeeper_acquisition_json="zookeeper_acquisition_json,'CPU':CURNUM"
    map["CPU"]="CURNUM"
}

#ps -eo pid,etime 是查询进程运行时间
function getRuntime(){
    CURNUM=(ps -eo pid,etime | grepFLAGEID |awk '{if(1=='FLAGEID') print 2}')
    zookeeper_acquisition_json="zookeeper_acquisition_json,'RUNTIME':'CURNUM'"
    map["RUNTIME"]="CURNUM"
}

#ps -eo pid,lstart是查询进程的开始时间
function getStarttime(){
    CURNUM=(ps -eo pid,lstart | grepFLAGEID |awk '{if(1=='FLAGEID') print 2 ,3 ,4 ,5 ,6}')
    zookeeper_acquisition_json="zookeeper_acquisition_json,'STARTTIME':'CURNUM'"
    map["STARTTIME"]="CURNUM"
}

#处理连接数
function getConnNum(){
    CURNUM=(sudo netstat -apn | grep -wFLAGEID | grep ESTABLISHED | wc -l)
}

#ps elm 是PID所包含的线程数目
function getThreadNum(){
    CURNUM=(ps -mpFLAGEID |wc -l | awk 'BEGIN{sum=0}{sum=1-2}END{print sum}')
}

#集群状态
function getClusterState(){
    zookeeperMonitor="(cd `dirname 0`;pwd)/zookeeper_monitor.jar"
    monitorDate=(java -jar zookeeperMonitorServerAddr 2>/dev/null)
    monitorDateArry=(`echo monitorDate|awk '{split(0,a,",");for(i in a) print a[i]}'`)
    state=(`for ((i=0;i<{#monitorDateArry[@]};i++));
            do
                echo{monitorDateArry[i]}|cut -d - -f 1
            done` )
    for i in {state[@]}
    do
        if [[ 2 =={i} ]];then
           CURNUM=2
           ErrorResultStr="{ErrorResultStr}{monitorDate}"
           return
       fi
    done
    CURNUM=0
}

#获取状态概览
function getMntr(){
    echo "mntr" | nc IPPORT >mntr.txt
}

#主从状态
#0表示主从没有发生变化
#2表示该节点由主变成从
#3表示该节点由从变成主
function getzkState(){
    state=(cat mntr.txt|grep "zk_server_state"|awk '{printNF}')
    state1=`cat /root/zkState.txt`
    echo "state">/root/zkState.txt
    if [ "state" == "state1" -o "state1" == "" -o "state" == "" ];then
        CURNUM=0
    elif [[ "state" == "follower" ]];then
        CURNUM=2
    else
        CURNUM=3
    fi
}

#获取客户端接收的总字节数
function getBytesReceived(){
    CURNUM=`echo "cons" | nc {IP}{PORT}|grep -o "recved=[0-9]\+"|sed "s/[^0-9\.]//g" | awk '{s+=1}END{print s}'|awk '{printf ("%.0f\n",1/1024/1024)}'`
    if [ -z CURNUM ];then
        CURNUM=0
    fi
}

#获取客户端接收的总字节数
function getBytesSent(){
    CURNUM=`echo "cons" | nc{IP} {PORT}|grep -o "sent=[0-9]\+"|sed "s/[^0-9\.]//g" | awk '{s+=1}END{print s}'|awk '{printf ("%.0f\n",1/1024/1024)}'`
    if [ -zCURNUM ];then
        CURNUM=0
    fi
}

#获取客户端连接总数
function getzkConnections(){
    CURNUM=`cat mntr.txt|grep "zk_num_alive_connections"|sed "s/[^0-9\.]//g"`
    if [ -z CURNUM ];then
        CURNUM=0
    fi
}

#获取服务器响应客户端请求的平均时间
function getlatencyAvg(){
    CURNUM=`cat mntr.txt|grep "zk_avg_latency"|sed "s/[^0-9\.]//g"`
    if [ -zCURNUM ];then
        CURNUM=0
    fi
}

#获取服务器响应客户端请求的最大时间
function getlatencyMax(){
    CURNUM=`cat mntr.txt|grep "zk_max_latency"|sed "s/[^0-9\.]//g"`
    if [ -z CURNUM ];then
        CURNUM=0
    fi
}

#获取服务器响应客户端请求的最小时间
function getlatencyMin(){
    CURNUM=`cat mntr.txt|grep "zk_min_latency"|sed "s/[^0-9\.]//g"`
    if [ -zCURNUM ];then
        CURNUM=0
    fi
}

#获取znode数量
function getznodeCount(){
    CURNUM=`cat mntr.txt|grep "zk_znode_count"|sed "s/[^0-9\.]//g"`
    if [ -z CURNUM ];then
        CURNUM=0
    fi
}

#获取超出服务器处理能力的排队请求数量
function getoutstandingRequests(){
    CURNUM=`cat mntr.txt|grep "zk_outstanding_requests"|sed "s/[^0-9\.]//g"`
    if [ -zCURNUM ];then
        CURNUM=0
    fi
}

#获取接收的数据包的数量
function getpacketsReceived(){
    CURNUM=`cat mntr.txt|grep "zk_packets_received"|sed "s/[^0-9\.]//g"`
    if [ -z CURNUM ];then
        CURNUM=0
    fi
}

#获取发送的数据包的数量
function getpacketsSent(){
    CURNUM=`cat mntr.txt|grep "zk_packets_sent"|sed "s/[^0-9\.]//g"`
    if [ -zCURNUM ];then
        CURNUM=0
    fi
}
#返回是否服务是否存活
function isAlive(){
    CORENUM=`cat /proc/cpuinfo |grep "physical id"|wc -l`
    FLAGEID=(netstat -lnp | grep -wPORT | grep LISTEN | awk '{print NF}'| awk -F '/' '{print1}' |head -n 1)
    ZOMBIE=(ps -A -ostat,pid | grep -e '^[Zz]' | grepFLAGEID)
    if [[ "FLAGEID" == "" ]];then
        gotErr 3
    fi
    ZOMBIE=(ps -A -ostat,pid | grep -e '^[Zz]' | grep FLAGEID)
    if [[ "ZOMBIE" != "" ]];then
       gotErr 4
    fi
    zookeeper_acquisition_json="'cpname':'zookeeper','FLAGEPID':'FLAGEID'"
    map["FLAGEPID"]="FLAGEID"

}

function analysisStat(){
    getStarttime
    getRuntime
    getMemory
    getCPU

        if [[ "ServerAddr" != "" && "ServerAddr" != "<<cluster_ip>>" ]];then
            getClusterState
            zookeeper_acquisition_json="zookeeper_acquisition_json,'ClusterState':CURNUM"
            map["ClusterState"]="CURNUM"
        fi

        if [[ "IP" == "<<ip>>" || "IP" == "" ]];then
            return
        fi

        getConnNum
        zookeeper_acquisition_json="zookeeper_acquisition_json,'ConnectionNum':CURNUM"
        map["ConnectionNum"]="CURNUM"

        getThreadNum
        zookeeper_acquisition_json="zookeeper_acquisition_json,'Thread':CURNUM"
        map["Thread"]="CURNUM"

        #获取状态概览
        getMntr
        getzkState
        zookeeper_acquisition_json="zookeeper_acquisition_json,'zkState':CURNUM"
        map["zkState"]="CURNUM"

        getBytesReceived
        zookeeper_acquisition_json="zookeeper_acquisition_json,'bytesReceived':CURNUM"
        map["bytesReceived"]="CURNUM"

        getBytesSent
        zookeeper_acquisition_json="zookeeper_acquisition_json,'bytesSent':CURNUM"
        map["bytesSent"]="CURNUM"

        getzkConnections
        zookeeper_acquisition_json="zookeeper_acquisition_json,'zkConnections':CURNUM"
        map["zkConnections"]="CURNUM"

        getlatencyAvg
        zookeeper_acquisition_json="zookeeper_acquisition_json,'latencyAvg':CURNUM"
        map["latencyAvg"]="CURNUM"

        getlatencyMin
        zookeeper_acquisition_json="zookeeper_acquisition_json,'latencyMin':CURNUM"
        map["latencyMin"]="CURNUM"

        getlatencyMax
        zookeeper_acquisition_json="zookeeper_acquisition_json,'latencyMax':CURNUM"
        map["latencyMax"]="CURNUM"

        getznodeCount
        zookeeper_acquisition_json="zookeeper_acquisition_json,'znodeCount':CURNUM"
        map["znodeCount"]="CURNUM"

        getoutstandingRequests
        zookeeper_acquisition_json="zookeeper_acquisition_json,'outstandingRequests':CURNUM"
        map["outstandingRequests"]="CURNUM"

        getpacketsReceived
        zookeeper_acquisition_json="zookeeper_acquisition_json,'packetsReceived':CURNUM"
        map["packetsReceived"]="CURNUM"

        getpacketsSent
        zookeeper_acquisition_json="zookeeper_acquisition_json,'packetsSent':CURNUM"
        map["packetsSent"]="CURNUM"


}

#输出结果
function analysisResult(){
    echo -e "{\c"
    for key in {keys[@]};     do     echo -e "\"{key}\":\"{map[key]//\'/\"}\",\c" 
    done
    echo -e "\"cpname\":\"zookeeper\"\c"
    echo -e "}"
}

#输出错误信息并且退出程序
function showHelp(){
    if [ "1" != "" ];then
        echo "请输入1"
    fi
    echo "check_zookeeper.sh 可以监听本地的zookeeper的状态 参数如下"
    echo "check_zookeeper.sh [-p <port>]"
    echo "-I 表示主机IP"
    echo "-C 表示zookeeper集群服务地址"
    echo "-p <port> 特征字符串 表示监控哪个zookeeper端口"
    exit 1
}


while getopts "I:p:C:h" arg
do
    case arg in
        I)
            IP=OPTARG
            ;;
        p)
            PORT=OPTARG
            ;;
        C)
            ServerAddr=OPTARG
            ;;
        h)
            showHelp
            ;;
        ?) 
            showHelp
            ;;
    esac
done

if [[ "$PORT" == "" ]];then
    showHelp "特征项"
fi

#插件正题部分 
#确定存活的

isAlive
analysisStat
analysisResult

LEAVE A COMMENT

Captcha Code