5fc8cc38f37b48788.jpg_fo742.png

#! /bin/bash
export LANG=en_US.UTF-8

#WARNLIMIT=-1      #警告值 超过该值则认为是需要告警
#ERRORLIMIT=-1         #错误值 超过该值认为是错误

FLAGE=
FLAGEID=
#是否输出FLAGPID
FLAGEOPID=1
#是否输出RUNTIME
RUNTIME=1
#是否输出STARTTIME
STARTTIME=0
TFSHOME=
NAMEPORT=

#连接数
CONN=0
CONNMIN=
CONNMAX=
#内存使用量
MEMORY=0
MEMORYMIN=
MEMORYMAX=
#CPU使用率
CPU=0
CPUMIN=
CPUMAX=
#空间使用率
USEAGE=0
USEAGEMIN=
USEAGEMAX=
CORENUM=1
CURNUM=             #当前的性能参数

WranResultStr=      #最后形成的告警字符串
ErrorResultStr=     #最后形成的错误字符串
DataResultStr=      #性能呢个参数字符串

quota=
QUOTASTR=
tfs_acquisition_json=""

function gotErr(){
    if [ "$1" -eq 0 ];then
        echo "mailstatedes=各项指标恢复正常 statedes=各项指标恢复正常|$DataResultStr"
    elif [ "$1" -eq 1 ];then 
        echo "quota=${quota} mailstatedes=$WranResultStr statedes=$WranResultStr ${QUOTASTR} |$DataResultStr"
    elif [ "$1" -eq 2 ];then 
        if [[ "${WranResultStr}" != "" ]]; then
            echo "quota=${quota} mailstatedes=${WranResultStr},${ErrorResultStr} statedes=${WranResultStr},${ErrorResultStr} ${QUOTASTR}|$DataResultStr"
            else
            echo "quota=${quota} mailstatedes=${ErrorResultStr} statedes=${ErrorResultStr} ${QUOTASTR}|$DataResultStr"
        fi
    elif [ "$1" -eq 3 ];then 
        if [ "$FLAGEOPID" -eq 1 ];then
            CURNUM=$FLAGEID
            parseFileds "FLAGEPID" $CURNUM $CURNUM
        fi
        echo "mailstatedes=无法获取到指标,疑似组件故障,请确认并请检查监控脚本和运维平台配置 statedes=无法获取到指标,疑似组件故障,请确认并请检查监控脚本和运维平台配置|$DataResultStr"
    elif [ "$1" -eq 4 ];then
        echo "mailstatedes=该组件进程为僵尸进程,请确认并请检查该组件状态 statedes=该组件进程为僵尸进程,请确认并请检查该组件状态|$DataResultStr"
        exit 3
    fi

    exit $1
}

#返回是否服务是否存活
function isAlive(){
    CORENUM=`cat /proc/cpuinfo |grep "physical id"|wc -l`
    #isAliveStr=$(ps aux |grep dataserver |grep -v grep|wc -l)
    #if [[ "$isAliveStr" -eq 0 ]];then
        #gotErr 3
    #fi

#  isAliveStr=$(ps aux |grep $TFSHOME/dataserver | grep "-i 3"|grep -v grep|wc -l)
#  if [[ "$isAliveStr" -eq 0 ]];then
#      gotErr 3
#  fi

    #isAliveStr=$(ps aux |grep htfs|grep -v grep|wc -l)
    #if [[ "$isAliveStr" -eq 0 ]];then
        #gotErr 3
    #fi
    #isAliveStr=$(ps aux |grep $TFSHOME/nameserver|grep -v grep|wc -l)
    #if [[ "$isAliveStr" -eq 0 ]];then
        #gotErr 3
    #fi
    #if [[ "$isAliveStr" -gt 1 ]];then
        #echo "你的标志项需要唯一标识一个进程"
        #showHelp
    #fi

    FLAGEID=$(ps aux |grep $TFSHOME/nameserver|grep -v grep| awk '{print $2}' |head -n 1 )

    ZOMBIE=$(ps -A -ostat,pid | grep -e '^[Zz]' | grep $FLAGEID)
    #if [[ "$ZOMBIE" != "" ]];then
       #gotErr 4
    #fi
}


#处理连接数
function getConnNum(){
    CURNUM=$(netstat -apn | grep $FLAGEID | grep ESTABLISHED | wc -l)
    tfs_acquisition_json="$tfs_acquisition_json,'ConnectionNum':$CURNUM"
}

#ps aux 4是内存 累加4的值
function getMemory(){
    CURNUM=$(ps aux | grep $TFSHOME |awk 'BEGIN{sum=0}{sum+=$4}END{print sum}')
    tfs_acquisition_json="$tfs_acquisition_json,'Memory':$CURNUM"
}

#ps aux 3是CPU 累加3的值
function getCPU(){
    CURNUM=$(ps aux | grep $TFSHOME |awk 'BEGIN{sum=0}{sum+=$3}END{print sum}')
    if [ "$CORENUM" -ne 0 ];then
       CURNUM=$(printf "%.2f" `echo "scale=2;$CURNUM/$CORENUM" | bc`)
    fi
    tfs_acquisition_json="$tfs_acquisition_json,'CPU':$CURNUM"
}
function getUSEAGE(){
    CURNUM=$($TFSHOME/ssm -s 127.0.0.1:$NAMEPORT -i server|grep "TOTAL:"|awk '{print $5*100/100}')
    tfs_acquisition_json="$tfs_acquisition_json,'USEAGE':$CURNUM"
}

#ps -eo pid,etime 是查询进程运行时间
function getRuntime(){
    CURNUM=$(ps -eo pid,etime | grep $FLAGEID |awk '{if($1=='$FLAGEID') print $2}')
    tfs_acquisition_json="$tfs_acquisition_json,'RUNTIME':'$CURNUM'"
}

#ps -eo pid,lstart是查询进程的开始时间
function getStarttime(){
    CURNUM=$(ps -eo pid,lstart | grep $FLAGEID |awk '{if($1=='$FLAGEID') print $2 ,$3 ,$4 ,$5 ,$6}')
    tfs_acquisition_json="$tfs_acquisition_json,'STARTTIME':'$CURNUM'"
}
#依据传入的参数值来判断我们要监控的服务是什么
#然后通过各个函数获取到具体的值
#值之间的
function analysisStat(){

    #if [ "$CONN" -eq 1 ];then
        getConnNum
        #parseFileds "ConnectionNum" $CONNMIN $CONNMAX
    #fi
    #if [ "$MEMORY" -eq 1 ];then
        getMemory
        #parseFileds "Memory" $MEMORYMIN $MEMORYMAX
    #fi
    #if [ "$CPU" -eq 1 ];then
        getCPU
        #parseFileds "CPU" $CPUMIN $CPUMAX
    #fi
    #if [ "$USEAGE" -eq 1 ];then
        getUSEAGE
        #parseFileds "USEAGE" $USEAGEMIN $USEAGEMAX
    #fi
    #if [ "$FLAGEOPID" -eq 1 ];then
        #CURNUM=$FLAGEID
        #parseTimeFileds "FLAGEPID" $CURNUM $CURNUM
    #fi
    #if [ "$RUNTIME" -eq 1 ];then
        getRuntime
        #parseTimeFileds "RUNTIME" $RUNTIME $RUNTIME
    #fi
    #if [ "$STARTTIME" -eq 1 ];then
        getStarttime
        #parseTimeFileds "STARTTIME" $STARTTIME $STARTTIME
    #fi

    result="{'cpname':'tfs',${tfs_acquisition_json#*,}}"
    result=`echo ${result//\'/\"}`
    echo $result
}
function parseTimeFileds(){
    val1=$(echo "$CURNUM $2")
    val2=$(echo "$CURNUM $3")
    DataResultStr="$DataResultStr $1=$CURNUM;$2;$3;0;0"
}

#CURNUM            当前值
#WARNLIMIT         告警的上限
#ERRORLIMIT    异常的上限
#判断以上三个值并拼接结果字符串
function parseFileds(){
    val1=$(echo "$CURNUM $2"| awk '{if($1<=$2){print 1}else{print 0}}')
    val2=$(echo "$CURNUM $3"| awk '{if($1<=$2){print 1}else{print 0}}')

    DataResultStr="$DataResultStr $1=$CURNUM;$2;$3;0;0"
    if [ $val1 -eq 1 ];then
        return 0
    elif [[ $val1 -eq 0 && $val2 -eq 1 ]];then
        if [[ ${WranResultStr} != "" ]]; then
            WranResultStr="${WranResultStr},"
        fi

        WranResultStr="${WranResultStr}$1当前值为${CURNUM}超过告警值${2}"

        if [[ ${quota} != "" ]]; then
            quota="${quota},"
        fi

        quota="${quota}$1"

        QUOTASTR="${QUOTASTR}$1=$1当前值为${CURNUM}超过告警值${2} "

        return 1
    else
        if [[ ${ErrorResultStr} != "" ]]; then
            ErrorResultStr="${ErrorResultStr},"
        fi
        ErrorResultStr="${ErrorResultStr}$1当前值为${CURNUM}超过紧急值${3}"

        if [[ ${quota} != "" ]]; then
            quota="${quota},"
        fi

        quota="${quota}$1"

        QUOTASTR="${QUOTASTR}$1=$1当前值为${CURNUM}超过紧急值${3} "

        return 2
    fi
}

#分析结果 给出给出状态
function analysisResult(){
    if [[ "$WranResultStr" == "" && "$ErrorResultStr" == "" ]];then
        gotErr 0
    elif [[ "$ErrorResultStr" == "" ]];then
        gotErr 1
    else
        gotErr 2
    fi
}

#输出错误信息并且退出程序
function showHelp(){
    if [ "$1" != "" ];then
        echo "请输入$1"
    fi
    echo "check_tfs.sh 可以监听本地的tfs的状态 参数如下"
    echo "check_tfs.sh [-w 连接数,CPU,内存,空间使用率] [-c 连接数,CPU,内存,空间使用率] [-b <str>] [-P <str>]"
    echo "-w 表示监控警告的值"
    echo "-c 表示异常值"
    echo "-b <str> tfs的安装目录"
    echo "-P <str> namenode的端口号"
    exit 1
}

#检查参数是不是数字
function isNumber(){
    isNum=$(echo "$1" | awk '{print($0~/^([0-9])+[\.]?([0-9])?$/)?1:0}')
    return $isNum
}

function isLarge(){

    isNumber $1
    if [[ $? -eq 0 ]]; then
        return 1
    fi

    isNumber $2
    if [[ $? -eq 0 ]]; then
        return 1
    fi

    isLarge=$(echo "$1 $2"| awk '{if($1>=$2){print 1}else{print 0}}')
    if [ $isLarge -eq 1 ];then
        return 0
    else
        return 1
    fi
}

function checkARG(){
    if [[ "$2" != "" && "$3" != "" ]];then
        isLarge "$3" "$2"
        if [[ $? -eq 1 ]]; then
            return 1
        else
            $1=1
        fi
    fi
    return 0
}

#检查参数是否合法
#即参数是否是小数和整数
#对于守护进程的选择 这个值可以不验证
function checkALL(){
    if [[ "$CONNMIN" != "" || "$CONNMAX" != "" ]];then
        if [[ "$CONNMAX" == "" || "$CONNMAX" == "" ]]; then
            return 1
        fi

        isLarge "$CONNMAX" "$CONNMIN"
        if [[ $? -eq 1 ]]; then
            return 1
        else
            CONN=1
        fi
    fi
    if [[ "$CPUMIN" != "" || "$CPUMAX" != "" ]];then
        if [[ "$CPUMAX" == "" || "$CPUMIN" == "" ]]; then
            return 1
        fi

        isLarge "$CPUMAX" "$CPUMIN"
        if [[ $? -eq 1 ]]; then
            return 1
        else
            CPU=1
        fi
    fi
    if [[ "$MEMORYMIN" != "" || "$MEMORYMAX" != "" ]];then

        if [[ "$MEMORYMAX" == "" || "$MEMORYMIN" == "" ]]; then
            return 1
        fi

        isLarge "$MEMORYMAX" "$MEMORYMIN"
        if [[ $? -eq 1 ]]; then
            return 1
        else
            MEMORY=1
        fi
    fi
    if [[ "$USEAGEMIN" != "" || "$USEAGEMAX" != "" ]];then

        if [[ "$USEAGEMAX" == "" || "$USEAGEMIN" == "" ]]; then
            return 1
        fi

        isLarge "$USEAGEMAX" "$USEAGEMIN"
        if [[ $? -eq 1 ]]; then
            return 1
        else
            USEAGE=1
        fi
    fi
    return 0
}

function parseARG(){
    #it mean warning
    if [ $1 -eq 1 ];then
        CONNMIN=$(echo $2 | awk -F ',' '{print $1}' | sed "s/[^0-9]//g")
        CPUMIN=$(echo $2 | awk -F ',' '{print $2}' | sed "s/[^0-9]//g")
        MEMORYMIN=$(echo $2 | awk -F ',' '{print $3}' | sed "s/[^0-9]//g")
        USEAGEMIN=$(echo $2 | awk -F ',' '{print $4}' | sed "s/[^0-9]//g")
    else
        CONNMAX=$(echo $2 | awk -F ',' '{print $1}' | sed "s/[^0-9]//g")
        CPUMAX=$(echo $2 | awk -F ',' '{print $2}' | sed "s/[^0-9]//g")
        MEMORYMAX=$(echo $2 | awk -F ',' '{print $3}' | sed "s/[^0-9]//g")
        USEAGEMAX=$(echo $2 | awk -F ',' '{print $4}' | sed "s/[^0-9]//g")
    fi

}

while getopts "b:P:h" arg
do
    case $arg in
        b)
            TFSHOME=$OPTARG
            ;;
        P)
            NAMEPORT=$OPTARG
            ;;
        h)
            showHelp
            ;;
        ?) 
            showHelp
            ;;
    esac
done

if [[ "$TFSHOME" == "" ]];then
    TFSHOME="/usr/local/tfs"
fi
if [[ "$NAMEPORT" == "" ]];then
    NAMEPORT="9108"
fi

#checkALL
if [[ $? == 1 ]];then
    showHelp "正确参数"
fi


#插件正题部分 
#确定存活的
#启动
#分析

isAlive
analysisStat
#analysisResult

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注

Captcha Code