#! /bin/bash
export LANG=en_US.UTF-8

#WARNLIMIT=-1      #警告值 超过该值则认为是需要告警
#ERRORLIMIT=-1         #错误值 超过该值认为是错误

PORT=

IP=localhost
FLAGE=
FLAGEID=
#是否输出FLAGPID
#FLAGEOPID=1
#是否输出RUNTIME
#RUNTIME=1
#是否输出STARTTIME
STARTTIME=0
VERBOSE=0

QUOTASTR=
quota=
zookeeper_acquisition_json=""
CORENUM=1

keys=(FLAGEPID Memory CPU RUNTIME STARTTIME ConnectionNum Thread ClusterState zkState bytesReceived 
bytesSent zkConnections latencyAvg latencyMin latencyMax znodeCount outstandingRequests packetsReceived packetsSent)
#echo ${#keys[@]} 看看多少个指标,不包含cpname
declare -A map=()

for var in ${keys[@]};  
do  
    map[$var]=""  
done

function gotErr(){
    if [ "$1" -eq 0 ];then
        echo "mailstatedes=各项指标恢复正常 statedes=各项指标恢复正常|$DataResultStr"
    elif [ "$1" -eq 3 ];then 
        echo "无法获取到组件PID,疑似组件故障,请确认并请检查监控脚本和运维平台配置"
    elif [ "$1" -eq 4 ];then
        echo "该组件进程为僵尸进程,请确认并请检查该组件状态"
        exit 3
    fi

    exit $1
}

#ps aux 4是内存 累加4的值
function getMemory(){
    CURNUM=$(ps aux | grep $FLAGEID |awk 'BEGIN{sum=0}{sum+=$4}END{print sum}')
    zookeeper_acquisition_json="$zookeeper_acquisition_json,'Memory':$CURNUM"
    map["Memory"]="$CURNUM"
}


#ps aux 3是CPU 累加3的值
function getCPU(){
    CURNUM=$(ps aux | grep $FLAGEID |awk 'BEGIN{sum=0}{sum+=$3}END{print sum}')
    if [ "$CORENUM" -ne 0 ];then
       CURNUM=$(printf "%.2f" `echo "scale=2;$CURNUM/$CORENUM" | bc`)
    fi
    zookeeper_acquisition_json="$zookeeper_acquisition_json,'CPU':$CURNUM"
    map["CPU"]="$CURNUM"
}

#ps -eo pid,etime 是查询进程运行时间
function getRuntime(){
    CURNUM=$(ps -eo pid,etime | grep $FLAGEID |awk '{if($1=='$FLAGEID') print $2}')
    zookeeper_acquisition_json="$zookeeper_acquisition_json,'RUNTIME':'$CURNUM'"
    map["RUNTIME"]="$CURNUM"
}

#ps -eo pid,lstart是查询进程的开始时间
function getStarttime(){
    CURNUM=$(ps -eo pid,lstart | grep $FLAGEID |awk '{if($1=='$FLAGEID') print $2 ,$3 ,$4 ,$5 ,$6}')
    zookeeper_acquisition_json="$zookeeper_acquisition_json,'STARTTIME':'$CURNUM'"
    map["STARTTIME"]="$CURNUM"
}

#处理连接数
function getConnNum(){
    CURNUM=$(sudo netstat -apn | grep -w $FLAGEID | grep ESTABLISHED | wc -l)
}

#ps elm 是PID所包含的线程数目
function getThreadNum(){
    CURNUM=$(ps -mp $FLAGEID |wc -l | awk 'BEGIN{sum=0}{sum=$1-2}END{print sum}')
}

#集群状态
function getClusterState(){
    zookeeperMonitor="$(cd `dirname $0`;pwd)/zookeeper_monitor.jar"
    monitorDate=$(java -jar $zookeeperMonitor $ServerAddr 2>/dev/null)
    monitorDateArry=(`echo $monitorDate|awk '{split($0,a,",");for(i in a) print a[i]}'`)
    state=(`for ((i=0;i<${#monitorDateArry[@]};i++));
            do
                echo ${monitorDateArry[i]}|cut -d - -f 1
            done` )
    for i in ${state[@]}
    do
        if [[ 2 == ${i} ]];then
           CURNUM=2
           ErrorResultStr="${ErrorResultStr} ${monitorDate}"
           return
       fi
    done
    CURNUM=0
}

#获取状态概览
function getMntr(){
    echo "mntr" | nc $IP $PORT >mntr.txt
}

#主从状态
#0表示主从没有发生变化
#2表示该节点由主变成从
#3表示该节点由从变成主
function getzkState(){
    state=$(cat mntr.txt|grep "zk_server_state"|awk '{print $NF}')
    state1=`cat /root/zkState.txt`
    echo "$state" > /root/zkState.txt
    if [ "$state" == "$state1" -o "$state1" == "" -o "$state" == "" ];then
        CURNUM=0
    elif [[ "$state" == "follower" ]];then
        CURNUM=2
    else
        CURNUM=3
    fi
}

#获取客户端接收的总字节数
function getBytesReceived(){
    CURNUM=`echo "cons" | nc ${IP} ${PORT}|grep -o "recved=[0-9]\+"|sed "s/[^0-9\.]//g" | awk '{s+=$1}END{print s}'|awk '{printf ("%.0f\n",$1/1024/1024)}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
}

#获取客户端接收的总字节数
function getBytesSent(){
    CURNUM=`echo "cons" | nc ${IP} ${PORT}|grep -o "sent=[0-9]\+"|sed "s/[^0-9\.]//g" | awk '{s+=$1}END{print s}'|awk '{printf ("%.0f\n",$1/1024/1024)}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
}

#获取客户端连接总数
function getzkConnections(){
    CURNUM=`cat mntr.txt|grep "zk_num_alive_connections"|sed "s/[^0-9\.]//g"`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
}

#获取服务器响应客户端请求的平均时间
function getlatencyAvg(){
    CURNUM=`cat mntr.txt|grep "zk_avg_latency"|sed "s/[^0-9\.]//g"`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
}

#获取服务器响应客户端请求的最大时间
function getlatencyMax(){
    CURNUM=`cat mntr.txt|grep "zk_max_latency"|sed "s/[^0-9\.]//g"`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
}

#获取服务器响应客户端请求的最小时间
function getlatencyMin(){
    CURNUM=`cat mntr.txt|grep "zk_min_latency"|sed "s/[^0-9\.]//g"`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
}

#获取znode数量
function getznodeCount(){
    CURNUM=`cat mntr.txt|grep "zk_znode_count"|sed "s/[^0-9\.]//g"`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
}

#获取超出服务器处理能力的排队请求数量
function getoutstandingRequests(){
    CURNUM=`cat mntr.txt|grep "zk_outstanding_requests"|sed "s/[^0-9\.]//g"`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
}

#获取接收的数据包的数量
function getpacketsReceived(){
    CURNUM=`cat mntr.txt|grep "zk_packets_received"|sed "s/[^0-9\.]//g"`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
}

#获取发送的数据包的数量
function getpacketsSent(){
    CURNUM=`cat mntr.txt|grep "zk_packets_sent"|sed "s/[^0-9\.]//g"`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
}
#返回是否服务是否存活
function isAlive(){
    CORENUM=`cat /proc/cpuinfo |grep "physical id"|wc -l`
    FLAGEID=$(netstat -lnp | grep -w $PORT | grep LISTEN | awk '{print $NF}'| awk -F '/' '{print $1}' |head -n 1)
    ZOMBIE=$(ps -A -ostat,pid | grep -e '^[Zz]' | grep $FLAGEID)
    if [[ "$FLAGEID" == "" ]];then
        gotErr 3
    fi
    ZOMBIE=$(ps -A -ostat,pid | grep -e '^[Zz]' | grep $FLAGEID)
    if [[ "$ZOMBIE" != "" ]];then
       gotErr 4
    fi
    zookeeper_acquisition_json="'cpname':'zookeeper','FLAGEPID':'$FLAGEID'"
    map["FLAGEPID"]="$FLAGEID"

}

function analysisStat(){
    getStarttime
    getRuntime
    getMemory
    getCPU

        if [[ "$ServerAddr" != "" && "$ServerAddr" != "<<cluster_ip>>" ]];then
            getClusterState
            zookeeper_acquisition_json="$zookeeper_acquisition_json,'ClusterState':$CURNUM"
            map["ClusterState"]="$CURNUM"
        fi

        if [[ "$IP" == "<<ip>>" || "$IP" == "" ]];then
            return
        fi

        getConnNum
        zookeeper_acquisition_json="$zookeeper_acquisition_json,'ConnectionNum':$CURNUM"
        map["ConnectionNum"]="$CURNUM"

        getThreadNum
        zookeeper_acquisition_json="$zookeeper_acquisition_json,'Thread':$CURNUM"
        map["Thread"]="$CURNUM"

        #获取状态概览
        getMntr
        getzkState
        zookeeper_acquisition_json="$zookeeper_acquisition_json,'zkState':$CURNUM"
        map["zkState"]="$CURNUM"

        getBytesReceived
        zookeeper_acquisition_json="$zookeeper_acquisition_json,'bytesReceived':$CURNUM"
        map["bytesReceived"]="$CURNUM"

        getBytesSent
        zookeeper_acquisition_json="$zookeeper_acquisition_json,'bytesSent':$CURNUM"
        map["bytesSent"]="$CURNUM"

        getzkConnections
        zookeeper_acquisition_json="$zookeeper_acquisition_json,'zkConnections':$CURNUM"
        map["zkConnections"]="$CURNUM"

        getlatencyAvg
        zookeeper_acquisition_json="$zookeeper_acquisition_json,'latencyAvg':$CURNUM"
        map["latencyAvg"]="$CURNUM"

        getlatencyMin
        zookeeper_acquisition_json="$zookeeper_acquisition_json,'latencyMin':$CURNUM"
        map["latencyMin"]="$CURNUM"

        getlatencyMax
        zookeeper_acquisition_json="$zookeeper_acquisition_json,'latencyMax':$CURNUM"
        map["latencyMax"]="$CURNUM"

        getznodeCount
        zookeeper_acquisition_json="$zookeeper_acquisition_json,'znodeCount':$CURNUM"
        map["znodeCount"]="$CURNUM"

        getoutstandingRequests
        zookeeper_acquisition_json="$zookeeper_acquisition_json,'outstandingRequests':$CURNUM"
        map["outstandingRequests"]="$CURNUM"

        getpacketsReceived
        zookeeper_acquisition_json="$zookeeper_acquisition_json,'packetsReceived':$CURNUM"
        map["packetsReceived"]="$CURNUM"

        getpacketsSent
        zookeeper_acquisition_json="$zookeeper_acquisition_json,'packetsSent':$CURNUM"
        map["packetsSent"]="$CURNUM"


}

#输出结果
function analysisResult(){
    echo -e "{\c"
    for key in ${keys[@]};  
    do  
    echo -e "\"${key}\":\"${map[$key]//\'/\"}\",\c" 
    done
    echo -e "\"cpname\":\"zookeeper\"\c"
    echo -e "}"
}

#输出错误信息并且退出程序
function showHelp(){
    if [ "$1" != "" ];then
        echo "请输入$1"
    fi
    echo "check_zookeeper.sh 可以监听本地的zookeeper的状态 参数如下"
    echo "check_zookeeper.sh [-p <port>]"
    echo "-I 表示主机IP"
    echo "-C 表示zookeeper集群服务地址"
    echo "-p <port> 特征字符串 表示监控哪个zookeeper端口"
    exit 1
}


while getopts "I:p:C:h" arg
do
    case $arg in
        I)
            IP=$OPTARG
            ;;
        p)
            PORT=$OPTARG
            ;;
        C)
            ServerAddr=$OPTARG
            ;;
        h)
            showHelp
            ;;
        ?) 
            showHelp
            ;;
    esac
done

if [[ "$PORT" == "" ]];then
    showHelp "特征项"
fi

#插件正题部分 
#确定存活的

isAlive
analysisStat
analysisResult

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注

Captcha Code