#! /bin/bash
export LANG=en_US.UTF-8
FLAGE=
FLAGEID=
#是否输出FLAGPID
FLAGEOPID=1
#是否输出RUNTIME
RUNTIME=1
#是否输出STARTTIME
STARTTIME=0
CONN=0              #是否检查连接数
CONNMIN=
CONNMAX=
MEMORY=0            #是否检查内存
MEMORYMIN=
MEMORYMAX=
CPU=0               #是否检查CPU
CPUMIN=
CPUMAX=

THREADNUM=0         #是否检查线程数
THREADMIN=
THREADMAX=

THREADFLAP=0
THREADFLAPMIN=
THREADFLAPMAX=

JMX=0           #是否利用JMX性能参数
JMXMAX=
JMXMIN=
RESTARTUSER="iflyweb"   #重启用户
isExcuteRestart=0   #是否进行重启
SERVICEDIR=         #服务执行目录
RESTARTTHREADLIMITMIN=
RESTARTTHREADLIMITMAX=
RESTARTTHREADFLAGLIMIT=
RETRYCOUNT=
RETRYTHREADMIN=
HistoryFlapping=0
CURNUM=             #当前的性能参数
CORENUM=1


WranResultStr=      #最后形成的告警字符串
ErrorResultStr=     #最后形成的错误字符串
DataResultStr=      #性能呢个参数字符串

QUOTASTR=
quota=

TEMPTHREADFILE="./cmd/linux/DUBBO_HISTHREAD_"
TEMPTRYCOUNTFILE="./cmd/linux/DUBBO_RETRYCOUNT_"
TEMPTRYLOGFILE="./cmd/linux/DUBBO_LOG_"

function gotErr(){
    if [ "$1" -eq 2 ];then 
        result="{'cpname':'dubbo',$DataResultStr}"
        result=`echo ${result//\'/\"}`
        echo $result
    elif [ "$1" -eq 3 ];then 
        echo "无法获取到指标,疑似组件故障,请确认并请检查监控脚本和运维平台配置"
    elif [ "$1" -eq 4 ];then
        echo "该组件进程为僵尸进程,请确认并请检查该组件状态"
        exit 3
    fi

    exit $1
}

#返回服务是否存活
function isAlive(){
    CORENUM=`cat /proc/cpuinfo |grep "physical id"|wc -l`
    FLAGEID=`ss -lnpt "( sport == :$PORT )"|tail -n 1|awk -F " " '{print $6}'|awk -F ',' '{print $2}'`
    if [[ "$FLAGEID" == "" ]];then
       gotErr 3
    fi
    ZOMBIE=$(ps -A -ostat,pid | grep -e '^[Zz]' | grep $FLAGEID)
    if [[ "$ZOMBIE" != "" ]];then
       gotErr 4
    fi
    #TEMPTHREADFILE=$TEMPTHREADFILE$FLAGE
    #TEMPTRYCOUNTFILE=$TEMPTRYCOUNTFILE$FLAGE
    #TEMPTRYLOGFILE=$TEMPTRYLOGFILE$FLAGE
    #获取线程历史数计算波动率
    #HistoryThreadNum=`cat $TEMPTHREADFILE|head -n 1`
}


#处理连接数
function getConnNum(){
    CURNUM=$(ss -oanp state established "( sport == :$PORT )"|grep -v "Address"|wc -l)
}

function getCloseWaitConnNum(){
    CURNUM=$(ss -oanp state close-wait "( sport == :$PORT )"|grep -v "Address"|wc -l)
}

#ps aux 4是内存 累加4的值
function getMemory(){
    CURNUM=$(ps aux | grep $FLAGEID |awk 'BEGIN{sum=0}{sum+=$4}END{print sum}')
}

#ps aux 3是CPU 累加3的值
function getCPU(){
    CURNUM=$(ps aux | grep $FLAGEID |awk 'BEGIN{sum=0}{sum+=$3}END{print sum}')
    if [ "$CORENUM" -ne 0 ];then
       CURNUM=$(printf "%.2f" `echo "scale=2;$CURNUM/$CORENUM" | bc`)
    fi
}

function getThreadNum() {
    CURNUM=$(pstree -p $FLAGEID | wc -l)
    THREADNUM=$CURNUM
    #echo $THREADNUM > $TEMPTHREADFILE
}

#ps -eo pid,etime 是查询进程运行时间
function getRuntime(){
    CURNUM=$(ps -eo pid,etime | grep $FLAGEID |awk '{if($1=='$FLAGEID') print $2}')
}

#ps -eo pid,lstart是查询进程的开始时间
function getStarttime(){
    CURNUM=$(ps -eo pid,lstart | grep $FLAGEID |awk '{if($1=='$FLAGEID') print $2 ,$3 ,$4 ,$5 ,$6}')
}

function getThreadFlapping() {
    isNumber $HistoryThreadNum
    if [[ $? -eq 1 ]]; then
        temp1=$(expr $THREADNUM - $HistoryThreadNum)
        temp2=$(expr $temp1 \* 100)
        HistoryFlapping=$(expr $temp2 / $HistoryThreadNum)
    fi
    CURNUM=$HistoryFlapping
}

function appendJMXDATA() {
    CURNUM=$JMXHeapMemoryUsage
    parseFileds "JMXHeapMemoryUsage" $JMXMIN $JMXMAX
    CURNUM=$JMXNonHeapMemoryUsage
    parseFileds "JMXNonHeapMemoryUsage" $JMXMIN $JMXMAX
    CURNUM=$JMXThreadCount
    parseFileds "JMXThreadCount" $JMXMIN $JMXMAX
    CURNUM=$JMXPeekThreadCount
    parseFileds "JMXPeekThreadCount" $JMXMIN $JMXMAX
    CURNUM=$JMXClassLoadCount
    parseFileds "JMXClassLoadCount" $JMXMIN $JMXMAX
    CURNUM=$JMXStartTime
    parseFileds "JMXStartTime" $JMXMIN $JMXMAX
    CURNUM=$JMXISDeadLock
    parseFileds "JMXISDeadLock" $JMXMIN $JMXMAX
}
#然后通过各个函数获取到具体的值
#值之间的
function analysisStat(){

        getConnNum
        parseFileds "ConnectionNum"

        getCloseWaitConnNum
        parseFileds "CloseWaitConnNum"

        getMemory
        parseFileds "Memory"

        getCPU
        parseFileds "CPU"

        getThreadNum
        parseFileds "ThreadNum"


    #if [ "$THREADFLAP" -eq 1 ];then
    #   getThreadFlapping
#      parseFiledFlaps "ThreadFlap" $THREADFLAPMIN $THREADFLAPMAX
#  fi

        CURNUM=$FLAGEID
        parseTimeFileds "FLAGEPID"

    JMX=0
    if [ "$JMX" -eq 1 ];then
        source ./cmd/linux/JMX_Info.sh $FLAGEID
        appendJMXDATA

        #Survivor0
        CURNUM=$(jstat -gcutil $FLAGEID |tail -n 1| awk -F ' ' '{print $1}')
        DataResultStr="$DataResultStr Survivor0=$CURNUM;999999;999999"

        #Survivor1
        CURNUM=$(jstat -gcutil $FLAGEID |tail -n 1| awk -F ' ' '{print $2}')
        DataResultStr="$DataResultStr Survivor1=$CURNUM;999999;999999"

        #EdenGen
        CURNUM=$(jstat -gcutil $FLAGEID |tail -n 1| awk -F ' ' '{print $3}')
        DataResultStr="$DataResultStr EdenGen=$CURNUM;999999;999999"

        #OldGen
        CURNUM=$(jstat -gcutil $FLAGEID |tail -n 1| awk -F ' ' '{print $4}')
        DataResultStr="$DataResultStr OldGen=$CURNUM;999999;999999"

        #PermGen
        CURNUM=$(jstat -gcutil $FLAGEID |tail -n 1| awk -F ' ' '{print $5}')
        DataResultStr="$DataResultStr PermGen=$CURNUM;999999;999999"

        mkdir temp_dubbo/

        #YoungGC
        CURNUM=$(jstat -gcutil $FLAGEID |tail -n 1| awk -F ' ' '{print $6}')
        LASTNUM=$(head -n 1 temp_dubbo/dubbo_YoungGC_"$FLAGEID".temp)
        LASTTIME=$(tail -n 1 temp_dubbo/dubbo_YoungGC_"$FLAGEID".temp)
        CURTIME=$(date +'%s')
        if [[ "$LASTNUM" != "" && "$LASTTIME" != "" ]]; then
            CURNUM2=`awk 'BEGIN{printf "%.2f",(('$CURNUM'-'$LASTNUM')/(('$CURTIME'-'$LASTTIME')/'60'))}'`
            DataResultStr="$DataResultStr YoungGC=$CURNUM2;999999;999999"

        fi
        echo $CURNUM > temp_dubbo/dubbo_YoungGC_"$FLAGEID".temp
        echo $CURTIME >> temp_dubbo/dubbo_YoungGC_"$FLAGEID".temp       

        #YoungGCTime
        LASTNUM=
        LASTTIME=
        CURNUM=$(jstat -gcutil $FLAGEID |tail -n 1| awk -F ' ' '{print $7}')
        LASTNUM=$(head -n 1 temp_dubbo/dubbo_YoungGCTime_"$FLAGEID".temp)
        LASTTIME=$(tail -n 1 temp_dubbo/dubbo_YoungGCTime_"$FLAGEID".temp)
        CURTIME=$(date +'%s')
        if [[ "$LASTNUM" != "" && "$LASTTIME" != "" ]]; then
            CURNUM2=`awk 'BEGIN{printf "%.2f",(('$CURNUM'-'$LASTNUM')/(('$CURTIME'-'$LASTTIME')/'60'))}'`
            DataResultStr="$DataResultStr YoungGCTime=$CURNUM2;999999;999999"

        fi
        echo $CURNUM > temp_dubbo/dubbo_YoungGCTime_"$FLAGEID".temp
        echo $CURTIME >> temp_dubbo/dubbo_YoungGCTime_"$FLAGEID".temp   


        #FullGC
        LASTNUM=
        LASTTIME=
        CURNUM=$(jstat -gcutil $FLAGEID |tail -n 1| awk -F ' ' '{print $8}')
        LASTNUM=$(head -n 1 temp_dubbo/dubbo_FullGC_"$FLAGEID".temp)
        LASTTIME=$(tail -n 1 temp_dubbo/dubbo_FullGC_"$FLAGEID".temp)
        CURTIME=$(date +'%s')
        if [[ "$LASTNUM" != "" && "$LASTTIME" != "" ]]; then
            CURNUM2=`awk 'BEGIN{printf "%.2f",(('$CURNUM'-'$LASTNUM')/(('$CURTIME'-'$LASTTIME')/'60'))}'`
            DataResultStr="$DataResultStr FullGC=$CURNUM2;999999;999999"

        fi
        echo $CURNUM > temp_dubbo/dubbo_FullGC_"$FLAGEID".temp
        echo $CURTIME >> temp_dubbo/dubbo_FullGC_"$FLAGEID".temp    

        #FGCT
        LASTNUM=
        LASTTIME=
        CURNUM=$(jstat -gcutil $FLAGEID |tail -n 1| awk -F ' ' '{print $9}')
        LASTNUM=$(head -n 1 temp_dubbo/dubbo_FullGCTime_"$FLAGEID".temp)
        LASTTIME=$(tail -n 1 temp_dubbo/dubbo_FullGCTime_"$FLAGEID".temp)
        CURTIME=$(date +'%s')
        if [[ "$LASTNUM" != "" && "$LASTTIME" != "" ]]; then
            CURNUM2=`awk 'BEGIN{printf "%.2f",(('$CURNUM'-'$LASTNUM')/(('$CURTIME'-'$LASTTIME')/'60'))}'`
            DataResultStr="$DataResultStr FullGCTime=$CURNUM2;999999;999999"

        fi
        echo $CURNUM > temp_dubbo/dubbo_FullGCTime_"$FLAGEID".temp
        echo $CURTIME >> temp_dubbo/dubbo_FullGCTime_"$FLAGEID".temp

        #GCTime
        LASTNUM=
        LASTTIME=
        CURNUM=$(jstat -gcutil $FLAGEID |tail -n 1| awk -F ' ' '{print $10}')
        LASTNUM=$(head -n 1 temp_dubbo/dubbo_GCTime_"$FLAGEID".temp)
        LASTTIME=$(tail -n 1 temp_dubbo/dubbo_GCTime_"$FLAGEID".temp)
        CURTIME=$(date +'%s')
        if [[ "$LASTNUM" != "" && "$LASTTIME" != "" ]]; then
            CURNUM2=`awk 'BEGIN{printf "%.2f",(('$CURNUM'-'$LASTNUM')/(('$CURTIME'-'$LASTTIME')/'60'))}'`
            DataResultStr="$DataResultStr GCTime=$CURNUM2;999999;999999"
        fi
        echo $CURNUM > temp_dubbo/dubbo_GCTime_"$FLAGEID".temp
        echo $CURTIME >> temp_dubbo/dubbo_GCTime_"$FLAGEID".temp
    fi
        getRuntime
        parseTimeFileds "RUNTIME"   

        getStarttime
        parseTimeFileds "STARTTIME" 


}
function parseTimeFileds(){

    if [[ "$DataResultStr" != "" ]];then
        DataResultStr=$DataResultStr","
    fi
    DataResultStr="$DataResultStr'$1':'$CURNUM'"
}

#CURNUM            当前值
#WARNLIMIT         告警的上限
#ERRORLIMIT    异常的上限
#判断以上三个值并拼接结果字符串
function parseFileds(){

    if [[ "$DataResultStr" != "" ]];then
        DataResultStr=$DataResultStr","
    fi
    DataResultStr="$DataResultStr'$1':'$CURNUM'"

}

function parseFiledFlaps() {

    if [[ "$DataResultStr" != "" ]];then
        DataResultStr=$DataResultStr","
    fi
    DataResultStr="$DataResultStr'$1':'$CURNUM'"
}

#分析结果 给出给出状态
function analysisResult(){
    gotErr 2
}

#输出错误信息并且退出程序
function showHelp(){
    if [ "$1" != "" ];then
        echo "请输入$1"
    fi
    echo "check_dubbo.sh 可以监听本地的dubbo的状态 参数如下"
    echo "check_dubbo.sh [-f <str>]"
    echo "-f <str> 特征字符串 表示监控哪个dubbo服务"
#  echo "-d <str> dubbo服务执行目录"
#  echo "-t <str> dubbo服务重启线程告警值和紧急值"
#  echo "-m <str> 波动dubbo服务判定的线程数下限值"
#  echo "-l <str> dubbo服务重启判断次数"
#  echo "-a 表示是否进行重启逻辑"
#  echo "-u 表示重启的用户名"
    exit 1
}

#dubbo服务重启功能
#通过比对当前线程数是否有剧烈波动&当前线程数是否高于重启值
function serviceRestartCheck(){
    #先进行是否重启判断
    if [[ $RESTARTTHREADLIMITMAX -eq "" || $THREADNUM -eq "" || "$RETRYCOUNT" -eq "" || $RESTARTTHREADLIMITMIN -eq "" ]];then
        return
    fi

    #记录时间
    echo "" >> $TEMPTRYLOGFILE
    date >> $TEMPTRYLOGFILE
    echo "CurrentThread: "$THREADNUM"       HistoryThread: "$HistoryThreadNum >> $TEMPTRYLOGFILE

    #判断是否需要进行重启标记
    #如果大于告警值上限则直接进行重启
    if [[ $THREADNUM -ge $RESTARTTHREADLIMITMAX ]];then
        echo "CRITICAL!!!" >> $TEMPTRYLOGFILE
        restartService
    elif [[ $THREADNUM -ge $RESTARTTHREADLIMITMIN ]];then 
        echo "WARNING!!!" >> $TEMPTRYLOGFILE
        echo 1 >>  $TEMPTRYCOUNTFILE
        #读取重启标记文件
        WarnCount=`cat $TEMPTRYCOUNTFILE |grep 1|wc -l`
        if [[ $WarnCount -ge $RETRYCOUNT ]];then
            restartService
        fi
    else
        echo "SAFE!!!" >> $TEMPTRYLOGFILE
        echo > $TEMPTRYCOUNTFILE
    fi
}
function restartService(){
        if [[ $isExcuteRestart -eq 1 ]];then
            echo > $TEMPTRYCOUNTFILE
            echo "CLEAN RESTARTFLAG" >> $TEMPTRYLOGFILE
            echo "restart service with port: "$FLAGEID >> $TEMPTRYLOGFILE
            #重启dubbo服务
            RestartLog=$(su - $RESTARTUSER -c $SERVICEDIR/bin/restart.sh)
            echo $RestartLog >> $TEMPTRYLOGFILE
            echo "" >> $TEMPTRYLOGFILE
            IsSuccess=$(echo $RestartLog|grep "Starting the isservice-provider ....OK"|wc -l)
            #设置告警信息
            if [[ ${quota} != "" ]]; then
                quota="${quota},"
            fi
            echo ${quota}
            if [[ "$IsSuccess" -ne "" ]]; then
                QUOTASTR="${QUOTASTR}重启状态=成功 "
            else
                QUOTASTR="${QUOTASTR}重启状态=失败 "
            fi
        fi
}

function parseRestartArgs(){
    RESTARTTHREADLIMITMIN=$(echo $1 | awk -F ',' '{print $1}' | sed "s/[^0-9]//g")
    RESTARTTHREADLIMITMAX=$(echo $1| awk -F ',' '{print $2}' | sed "s/[^0-9]//g")
}

while getopts "p:d:t:l:m:a:u:h" arg
do
    case $arg in
        t)
            parseRestartArgs "$OPTARG"
            ;;
        d)
            SERVICEDIR=$OPTARG
            ;;
        p)
            PORT=$OPTARG
            ;;
        l)
            RETRYCOUNT=$OPTARG
            ;;      
        m)
            RETRYTHREADMIN=$OPTARG
            ;;
        a)
            isExcuteRestart=1
            ;;
        h)
            showHelp
            ;;
        u)
            RESTARTUSER=$OPTARG
            ;;
        ?) 
            showHelp
            ;;
    esac
done
if [[ "$PORT" == "" ]];then
    showHelp "特征项"
fi

#插件正题部分 
#确定存活的
#启动
#分析
isAlive

analysisStat
serviceRestartCheck
analysisResult

发表评论

您的电子邮箱地址不会被公开。

Captcha Code