check_tomcat.sh查看tomcat信息

  sre

#! /bin/bash
export LANG=en_US.UTF-8

#WARNLIMIT=-1      #警告值 超过该值则认为是需要告警
#ERRORLIMIT=-1         #错误值 超过该值认为是错误
logPath=            #日志路径
FLAGE=
FLAGEID=
#是否输出FLAGPID
FLAGEOPID=1
#是否输出RUNTIME
RUNTIME=1
#是否输出STARTTIME
STARTTIME=0
CONN=0              #是否检查连接数
CONNMIN=
CONNMAX=
MEMORY=0            #是否检查内存
MEMORYMIN=
MEMORYMAX=
CPU=0               #是否检查CPU
CPUMIN=
CPUMAX=
THREAD=0            #是否检查线程
THREADMIN=
THREADMAX=
PORT= 
CURNUM=             #当前的性能参数
CORENUM=1
WranResultStr=      #最后形成的告警字符串
ErrorResultStr=     #最后形成的错误字符串
DataResultStr=      #性能呢个参数字符串

QUOTASTR=
quota=
tomcat_acquisition_json=""


keys=(FLAGEPID ConnectionNum Memory CPU Thread RUNTIME STARTTIME accessLog accessLog10)
#echo {#keys[@]} 看看多少个指标,不包含cpname
declare -A map=()

for var in{keys[@]};  
do  
    map[var]=""  
done

function gotErr(){
    if [ "1" -eq 3 ];then 
        echo "无法获取到指标,疑似组件故障,请确认并请检查监控脚本和运维平台配置"
    elif [ "1" -eq 4 ];then
        echo "该组件进程为僵尸进程,请确认并请检查该组件状态"
    fi

    exit1
}

#返回是否服务是否存活
function isAlive(){
    CORENUM=`cat /proc/cpuinfo |grep "physical id"|wc -l`
    FLAGEID=(ss -lnpt "( sport == :PORT )"|tail -n 1|awk -F " " '{print 6}'|awk -F ',' '{print2}')
    #-ne 1
    if [[ "FLAGEID" == "" ]];then
        gotErr 3
    fi
    ZOMBIE=(ps -A -ostat,pid | grep -e '^[Zz]' | grep FLAGEID)
    if [[ "ZOMBIE" != "" ]];then
       gotErr 4
    fi
}


#处理连接数
function getConnNum(){
    CURNUM=(ss -oanp state established "( sport == :PORT )"|grep -v "Address"|wc -l)
    CURNUM=(echoCURNUM | sed 's/^ //;s/ //')
    tomcat_acquisition_json="tomcat_acquisition_json,'ConnectionNum':CURNUM"
    map["ConnectionNum"]="CURNUM"
}

#ps aux 4是内存 累加4的值
function getMemory(){
    CURNUM=(ps aux | grep tomcat |grep bootstrap | grep "FLAGEID" |awk 'BEGIN{sum=0}{sum+=4}END{print sum}')
    tomcat_acquisition_json="tomcat_acquisition_json,'Memory':CURNUM"
    map["Memory"]="CURNUM"
}

#ps aux 3是CPU 累加3的值
function getCPU(){
    CURNUM=(ps aux | grep tomcat |grep bootstrap | grep "FLAGEID" |awk 'BEGIN{sum=0}{sum+=3}END{print sum}')
    if [ "CORENUM" -ne 0 ];then
       CURNUM=(printf "%.2f" `echo "scale=2;CURNUM/CORENUM" | bc`)
    fi
    tomcat_acquisition_json="tomcat_acquisition_json,'CPU':CURNUM"
    map["CPU"]="CURNUM"
}

#ps elm 是PID所包含的线程数目
function getThreadNum(){
    CURNUM=(ps -mpFLAGEID |wc -l | awk 'BEGIN{sum=0}{sum=1-2}END{print sum}')
    tomcat_acquisition_json="tomcat_acquisition_json,'Thread':CURNUM"
    map["Thread"]="CURNUM"
}

#ps -eo pid,etime 是查询进程运行时间
function getRuntime(){
    CURNUM=(ps -eo pid,etime | grepFLAGEID |awk '{if(1=='FLAGEID') print 2}')
    tomcat_acquisition_json="tomcat_acquisition_json,'RUNTIME':'CURNUM'"
    map["RUNTIME"]="CURNUM"
}

#ps -eo pid,lstart是查询进程的开始时间
function getStarttime(){
    CURNUM=(ps -eo pid,lstart | grepFLAGEID |awk '{if(1=='FLAGEID') print 2 ,3 ,4 ,5 ,6}')
    tomcat_acquisition_json="tomcat_acquisition_json,'STARTTIME':'CURNUM'"
    map["STARTTIME"]="CURNUM"
}

#获取最近1分钟access日志处理请求响应时间超过一秒的所有日志行数
#如果统计的日志行数大于告警值,dump该线程
#把dump文件保存在该日志所在文件夹下
function getAccessLog(){
    if [[ "logPath" == "" ]];then
        CURNUM=0
        tomcat_acquisition_json="tomcat_acquisition_json,'accessLog':CURNUM"
        map["accessLog"]="CURNUM"
        return
    fi
    time=`date +"%Y-%m-%d"`
    start_time=`date -d"1 minutes ago" +"%d/%b/%Y:%H:%M:%S"`
    end_time=`date +"%d/%b/%Y:%H:%M:%S"`
    access_log=`cd logPath;cat localhost_access_log.time.txt|awk -v st="start_time" -v et="end_time" '{t=substr(4,2); if(t>=st && t<=et){print0}}'|awk 'NF>1{print0}'|wc -l`

    CURNUM=access_log
    tomcat_acquisition_json="tomcat_acquisition_json,'accessLog':CURNUM"
    map["accessLog"]="CURNUM"
}
#获取最近1分钟access日志处理请求响应时间超过十秒的所有日志行数
function getAccessLog10(){
    if [[ "logPath" == "" ]];then
        CURNUM=0
        tomcat_acquisition_json="tomcat_acquisition_json,'accessLog10':CURNUM"
        map["accessLog10"]="CURNUM"
        return
    fi
    time=`date +"%Y-%m-%d"`
    start_time=`date -d"1 minutes ago" +"%d/%b/%Y:%H:%M:%S"`
    end_time=`date +"%d/%b/%Y:%H:%M:%S"`
    access_log10=`cd logPath;cat localhost_access_log.time.txt|awk -v st="start_time" -v et="end_time" '{t=substr(4,2); if(t>=st && t<=et){print0}}'|awk 'NF>10{print0}'|wc -l`

    CURNUM=access_log10
    tomcat_acquisition_json="tomcat_acquisition_json,'accessLog10':CURNUM"
    map["accessLog10"]="CURNUM"
}
#依据传入的参数值来判断我们要监控的服务是什么
#然后通过各个函数获取到具体的值
#值之间的
function analysisStat(){


        getConnNum

        getMemory

        getCPU

        getThreadNum

        CURNUM=FLAGEID
        tomcat_acquisition_json="tomcat_acquisition_json,'FLAGEPID':'CURNUM'"
        map["FLAGEPID"]="CURNUM"

        getRuntime

        getStarttime

        if [[ "logPath" == "" || "logPath" == "<<log_path>>" ]];then

            return
        fi

        getAccessLog

        getAccessLog10

}

#输出结果
function analysisResult(){
    echo -e "{\c"
    for key in {keys[@]};     do     echo -e "\"{key}\":\"{map[key]//\'/\"}\",\c" 
    done
    echo -e "\"cpname\":\"tomcat\"\c"
    echo -e "}"
}

#输出错误信息并且退出程序
function showHelp(){
    if [ "1" != "" ];then
        echo "请输入1"
    fi
    echo "check_tomcat.sh 可以监听本地的tomcat的状态 参数如下"
    echo "check_tomcat.sh [-w 连接数,CPU,内存,线程数] [-c 连接数,CPU,内存,线程数] -p <port>"
    echo "-w 表示监控警告的值"
    echo "-c 表示异常值"
    echo "-p 表示tomcat端口号"
    echo "-l 表示日志路径"
    exit 1
}




while getopts "p:l:h" arg
do
    case arg in
        h)
            showHelp
            ;;
        p)
            PORT=OPTARG
            ;;
        l)
            logPath=OPTARG
            ;;
        ?)            showHelp
            ;;
    esac
done

if [[ "PORT" == "" ]];then
    showHelp "特征项"
fi



#插件正题部分 
#确定存活的
#启动
#分析
isAlive
analysisStat
analysisResult

LEAVE A COMMENT

Captcha Code