#! /bin/bash
export LANG=en_US.UTF-8

#WARNLIMIT=-1      #警告值 超过该值则认为是需要告警
#ERRORLIMIT=-1         #错误值 超过该值认为是错误

PORT=
IP=
FLAGEID=
#是否输出RUNTIME
RUNTIME=1
#是否输出STARTTIME
STARTTIME=0

CONN=0              #是否检查连接数
MEMORY=0            #是否检查内存
CPU=0               #是否检查CPU
THREADNUM=0         #是否检查线程
CORENUM=1

CURNUM=             #当前的性能参数

WranResultStr=      #最后形成的告警字符串
ErrorResultStr=     #最后形成的错误字符串
DataResultStr=      #性能呢个参数字符串

quota=
QUOTASTR=

function gotErr(){
    if [ "$1" -eq 0 ];then 
        result="{'cpname':'elasticsearch',$DataResultStr}"
        result=`echo ${result//\'/\"}`
        echo $result
    elif [ "$1" -eq 3 ];then 
        echo "无法获取到指标,疑似组件故障,请确认并请检查监控脚本和运维平台配置"
    elif [ "$1" -eq 4 ];then
        echo "该组件进程为僵尸进程,请确认并请检查该组件状态"
        exit 3
    fi

    exit $1
}

#返回是否服务是否存活
function isAlive(){
    CORENUM=`cat /proc/cpuinfo |grep "physical id"|wc -l`
    FLAGEID=$(netstat -lnp | grep java |grep -w $PORT |awk '{print $NF}'| awk -F '/' '{print $1}'| head -n 1)
    if [[ "$FLAGEID" == "" ]];then
        gotErr 3
    fi
    ZOMBIE=$(ps -A -ostat,pid | grep -e '^[Zz]' | grep $FLAGEID)
    if [[ "$ZOMBIE" != "" ]];then
       gotErr 4
    fi

    CURNUM=$FLAGEID
    parseFileds "FLAGEPID"
}


#处理连接数
function getConnNum(){
    CURNUM=$(netstat -apn | grep $FLAGEID | grep ESTABLISHED | wc -l)
}

#ps aux 4是内存 累加4的值
function getMemory(){
    CURNUM=$(ps aux | grep $FLAGEID |awk 'BEGIN{sum=0}{sum+=$4}END{print sum}')
}

#ps aux 3是CPU 累加3的值
function getCPU(){
    CURNUM=$(ps aux | grep $FLAGEID |awk 'BEGIN{sum=0}{sum+=$3}END{print sum}')
    if [ "$CORENUM" -ne 0 ];then
       CURNUM=$(printf "%.2f" `echo "scale=2;$CURNUM/$CORENUM" | bc`)
    fi

}

#ps elm 是PID所包含的线程数目
function getThreadNum(){
    CURNUM=$(ps -mp $FLAGEID |wc -l | awk 'BEGIN{sum=0}{sum=$1-2}END{print sum}')
}

#ps -eo pid,etime 是查询进程运行时间
function getRuntime(){
    CURNUM=$(ps -eo pid,etime | grep $FLAGEID |awk '{if($1=='$FLAGEID') print $2}')
}

#ps -eo pid,lstart是查询进程的开始时间
function getStarttime(){
    CURNUM=$(ps -eo pid,lstart | grep $FLAGEID |awk '{if($1=='$FLAGEID') print $2 ,$3 ,$4 ,$5 ,$6}')
}

#获取clusterStats
function getclusterStats(){
    clusterStats=`curl http://$IP:$PORT/_cluster/stats 2>/dev/null|sed -e 's/[{}]/''/g' | awk '{n=split($0,a,","); for (i=1; i<=n; i++) print a[i]}'`
    if [[ ! -f clusterStats.txt ]];then
       touch clusterStats.txt
    fi
    echo "$clusterStats" >clusterStats.txt
}

#获取node_stats
function get_node_stats(){
    nodesStats=`curl http://$IP:$PORT/_nodes/stats 2>/dev/null|sed -e 's/[{}]/''/g' | awk '{n=split($0,a,","); for (i=1; i<=n; i++) print a[i]}'`
    if [[ ! -f nodesStats.txt ]];then
       touch nodesStats.txt
    fi
    echo "$nodesStats" >nodesStats.txt
}

#依据传入的参数值来判断我们要监控的服务是什么
#然后通过各个函数获取到具体的值
#值之间的
function analysisStat(){
    getConnNum
    parseFileds "ConnectionNum"

    getMemory
    parseFileds "Memory"

    getCPU
    parseFileds "CPU"

    getThreadNum
    parseFileds "Thread" 

    getRuntime
    parseFileds "RUNTIME"

    getStarttime
    parseFileds "STARTTIME"

    if [[ ${IP} == "" || ${IP} == "<<ip>>" ]];then
        CURNUM=""
        parseFileds "totalShards"
        parseFileds "primariesShards"
        parseFileds "filteEvictions"
        parseFileds "filterSize"
        parseFileds "clusterStatus"
        parseFileds "docsCount"
        parseFileds "docsDelete"
        parseFileds "fielddataEvictions" 
        parseFileds "fielddataSize"
        parseFileds "flushToal" 
        parseFileds "flushToalTime"
        parseFileds "indexingDeleteCurrent"
        parseFileds "indexingDeleteTime"
        parseFileds "indexingDeleteTotal"
        parseFileds "indexingIndexCurrent"
        parseFileds "indexingIndexTime"
        parseFileds "indexingIndexTotal"
        parseFileds "jvmGcCollectorsOldCollectionTime"
        parseFileds "jvmGcCollectorsOldCount"
        parseFileds "jvmGcCollectorsYoungCollectionTime"
        parseFileds "jvmGcCollectorsYoungCount"
        parseFileds "jvmMemHeapCommitted"
        parseFileds "jvmMemHeapMax"
        parseFileds "jvmMemHeapUsed"
        parseFileds "jvmMemNonHeapCommitted"
        parseFileds "jvmMemNonHeapUsed"
        parseFileds "jvmThreadsCount"
        parseFileds "jvmThreadsPeakCount"
        return
    fi
    #获取es状态
    getclusterStats
    get_node_stats

    total=`cat clusterStats.txt |grep -o '"shards":"total":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${total}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "totalShards"


    primaries=`cat clusterStats.txt |grep -o '"primaries":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${primaries}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "primariesShards"


    filteevictions=`cat clusterStats.txt |grep -o '"evictions":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${filteevictions}|awk 'NR==1{print $2}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "filteEvictions"


    filtersize=`cat clusterStats.txt |grep -o '"filter_cache":"memory_size_in_bytes":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${filtersize}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "filterSize"


    states=`cat clusterStats.txt |grep -w '"status":'|awk -F '[":]+' '{print $3}'`
    if [ "$states" == "green" -o "$states" = "" ];then
        CURNUM=0
    elif [ "$states" == "yellow" ];then
        CURNUM=2
    else
        CURNUM=4
    fi
    parseFileds "clusterStatus"


    docscount=`cat clusterStats.txt |grep -o '"docs":"count":"memory_size_in_bytes":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${docscount}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "docsCount"


    docsdelete=`cat clusterStats.txt |grep -o '"deleted":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${docsdelete}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "docsDelete" 


    fielddataevictions=`cat clusterStats.txt |grep -o '"evictions":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${fielddataevictions}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "fielddataEvictions" 


    fielddatasize=`cat clusterStats.txt |grep -o '"fielddata":"memory_size_in_bytes":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${fielddatasize}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "fielddataSize"


    flushtoal=`cat nodesStats.txt |grep -o '"flush":"total":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${flushtoal}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "flushToal" 


    flushtoaltime=`cat nodesStats.txt |grep -o '"total_time_in_millis":[0-9]\+'|sed "s/[^0-9\.]//g"`
    flushtoaltime_first=`echo ${flushtoaltime}|awk 'NR==1{print $3}'`
    CURNUM=`awk -v flushtoaltime_first_awk="$flushtoaltime_first"  'BEGIN{printf "%.2f\n",(flushtoaltime_first_awk/ 1000)}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "flushToalTime"


    indexingdeletecurrent=`cat nodesStats.txt |grep -o '"delete_current":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${indexingdeletecurrent}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "indexingDeleteCurrent" 


    indexingdeletetime=`cat nodesStats.txt |grep -o '"delete_time_in_millis":[0-9]\+'|sed "s/[^0-9\.]//g"`
    indexingdeletetime_first=`echo ${indexingdeletetime}|awk 'NR==1{print $1}'`
    CURNUM=`awk -v indexingdeletetime_first_awk="$indexingdeletetime_first" 'BEGIN{printf "%.2f\n",(indexingdeletetime_first_awk/ 1000)}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "indexingDeleteTime"


    indexingdeletetotal=`cat nodesStats.txt |grep -o '"delete_total":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${indexingdeletetotal}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "indexingDeleteTotal" 


    indexingindexcurrent=`cat nodesStats.txt |grep -o '"index_current":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${indexingindexcurrent}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "indexingIndexCurrent" 


    indexingindextime=`cat nodesStats.txt |grep -o '"index_time_in_millis":[0-9]\+'|sed "s/[^0-9\.]//g"`
    indexingindextime_first=`echo ${indexingindextime}|awk 'NR==1{print $1}'`
    CURNUM=`awk -v indexingindextime_first_awk=$indexingindextime_first 'BEGIN{printf "%.2f\n",(indexingindextime_first_awk/ 1000)}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "indexingIndexTime"


    indexingindextotal=`cat nodesStats.txt |grep -o '"indexing":"index_total":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${indexingindextotal}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "indexingIndexTotal"


    OldCollectionTime=`cat nodesStats.txt |grep -o '"collection_time_in_millis":[0-9]\+'|sed "s/[^0-9\.]//g"`
    OldCollectionTime_first=`echo ${OldCollectionTime}|awk 'NR==1{print $2}'`
    CURNUM=`awk -v OldCollectionTime_first_awk=$OldCollectionTime_first 'BEGIN{printf "%.2f\n",(OldCollectionTime_first_awk/ 1000)}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "jvmGcCollectorsOldCollectionTime"


    OldCount=`cat nodesStats.txt |grep -o '"old":"collection_count":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${OldCount}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "jvmGcCollectorsOldCount"


    YoungCollectionTime=`cat nodesStats.txt |grep -o '"collection_time_in_millis":[0-9]\+'|sed "s/[^0-9\.]//g"`
    YoungCollectionTime_first=`echo ${YoungCollectionTime}|awk 'NR==1{print $1}'`
    CURNUM=`awk -v YoungCollectionTime_first_awk=$YoungCollectionTime_first 'BEGIN{printf "%.2f\n",(YoungCollectionTime_first_awk/ 1000)}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "jvmGcCollectorsYoungCollectionTime"


    YoungCount=`cat nodesStats.txt |grep -o '"gc":"collectors":"young":"collection_count":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${YoungCount}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "jvmGcCollectorsYoungCount"


    HeapCommitted=`cat nodesStats.txt |grep -o '"heap_committed_in_bytes":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${HeapCommitted}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "jvmMemHeapCommitted"


    HeapMax=`cat nodesStats.txt |grep -o '"heap_max_in_bytes":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${HeapMax}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "jvmMemHeapMax" 


    HeapUsed=`cat nodesStats.txt |grep -o '"heap_used_percent":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${HeapUsed}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "jvmMemHeapUsed"


    NonHeapCommitted=`cat nodesStats.txt |grep -o '"non_heap_committed_in_bytes":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${NonHeapCommitted}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "jvmMemNonHeapCommitted" 


    NonHeapUsed=`cat nodesStats.txt |grep -o '"non_heap_used_in_bytes":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${NonHeapUsed}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "jvmMemNonHeapUsed" 


    ThreadsCount=`cat nodesStats.txt |grep -o '"threads":"count":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${ThreadsCount}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "jvmThreadsCount" 


    ThreadsCount=`cat nodesStats.txt |grep -o '"peak_count":[0-9]\+'|sed "s/[^0-9\.]//g"`
    CURNUM=`echo ${ThreadsCount}|awk 'NR==1{print $1}'`
    if [ -z $CURNUM ];then
        CURNUM=0
    fi
    parseFileds "jvmThreadsPeakCount" 


}


#CURNUM            当前值
#判断以上三个值并拼接结果字符串
function parseFileds(){
    if [[ "$DataResultStr" != "" ]];then
        DataResultStr=$DataResultStr","
    fi
    DataResultStr="$DataResultStr'$1':'$CURNUM'"
}

#分析结果 给出给出状态
function analysisResult(){
        gotErr 0
}

#输出错误信息并且退出程序
function showHelp(){
    if [ "$1" != "" ];then
        echo "请输入$1"
    fi
    echo "check_elasticsearch.sh 可以监听本地的flume的状态 参数如下"
    echo "check_elasticsearch.sh [-p <port>] [-I <ip>]"
    echo "-p <port> 特征字符串 表示监控哪个elasticsearch端口"
    echo "-I <ip> ip地址"
    exit 1
}


while getopts "p:I:h" arg
do
    case $arg in
        p)
            PORT=$OPTARG
            ;;
        I)
            IP=$OPTARG
            ;;
        h)
            showHelp
            ;;
        ?) 
            showHelp
            ;;
    esac
done

if [[ "$PORT" == "" ]];then
    showHelp "端口号"
fi


#插件正题部分 
#确定存活的
#启动
#分析

isAlive
analysisStat
analysisResult

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注

Captcha Code