
#! /bin/bash
export LANG=en_US.UTF-8
#WARNLIMIT=-1 #警告值 超过该值则认为是需要告警
#ERRORLIMIT=-1 #错误值 超过该值认为是错误
PORT=
IP=
FLAGEID=
#是否输出RUNTIME
RUNTIME=1
#是否输出STARTTIME
STARTTIME=0
CONN=0 #是否检查连接数
MEMORY=0 #是否检查内存
CPU=0 #是否检查CPU
THREADNUM=0 #是否检查线程
CORENUM=1
CURNUM= #当前的性能参数
WranResultStr= #最后形成的告警字符串
ErrorResultStr= #最后形成的错误字符串
DataResultStr= #性能呢个参数字符串
quota=
QUOTASTR=
function gotErr(){
if [ "$1" -eq 0 ];then
result="{'cpname':'elasticsearch',$DataResultStr}"
result=`echo ${result//\'/\"}`
echo $result
elif [ "$1" -eq 3 ];then
echo "无法获取到指标,疑似组件故障,请确认并请检查监控脚本和运维平台配置"
elif [ "$1" -eq 4 ];then
echo "该组件进程为僵尸进程,请确认并请检查该组件状态"
exit 3
fi
exit $1
}
#返回是否服务是否存活
function isAlive(){
CORENUM=`cat /proc/cpuinfo |grep "physical id"|wc -l`
FLAGEID=$(netstat -lnp | grep java |grep -w $PORT |awk '{print $NF}'| awk -F '/' '{print $1}'| head -n 1)
if [[ "$FLAGEID" == "" ]];then
gotErr 3
fi
ZOMBIE=$(ps -A -ostat,pid | grep -e '^[Zz]' | grep $FLAGEID)
if [[ "$ZOMBIE" != "" ]];then
gotErr 4
fi
CURNUM=$FLAGEID
parseFileds "FLAGEPID"
}
#处理连接数
function getConnNum(){
CURNUM=$(netstat -apn | grep $FLAGEID | grep ESTABLISHED | wc -l)
}
#ps aux 4是内存 累加4的值
function getMemory(){
CURNUM=$(ps aux | grep $FLAGEID |awk 'BEGIN{sum=0}{sum+=$4}END{print sum}')
}
#ps aux 3是CPU 累加3的值
function getCPU(){
CURNUM=$(ps aux | grep $FLAGEID |awk 'BEGIN{sum=0}{sum+=$3}END{print sum}')
if [ "$CORENUM" -ne 0 ];then
CURNUM=$(printf "%.2f" `echo "scale=2;$CURNUM/$CORENUM" | bc`)
fi
}
#ps elm 是PID所包含的线程数目
function getThreadNum(){
CURNUM=$(ps -mp $FLAGEID |wc -l | awk 'BEGIN{sum=0}{sum=$1-2}END{print sum}')
}
#ps -eo pid,etime 是查询进程运行时间
function getRuntime(){
CURNUM=$(ps -eo pid,etime | grep $FLAGEID |awk '{if($1=='$FLAGEID') print $2}')
}
#ps -eo pid,lstart是查询进程的开始时间
function getStarttime(){
CURNUM=$(ps -eo pid,lstart | grep $FLAGEID |awk '{if($1=='$FLAGEID') print $2 ,$3 ,$4 ,$5 ,$6}')
}
#获取clusterStats
function getclusterStats(){
clusterStats=`curl http://$IP:$PORT/_cluster/stats 2>/dev/null|sed -e 's/[{}]/''/g' | awk '{n=split($0,a,","); for (i=1; i<=n; i++) print a[i]}'`
if [[ ! -f clusterStats.txt ]];then
touch clusterStats.txt
fi
echo "$clusterStats" >clusterStats.txt
}
#获取node_stats
function get_node_stats(){
nodesStats=`curl http://$IP:$PORT/_nodes/stats 2>/dev/null|sed -e 's/[{}]/''/g' | awk '{n=split($0,a,","); for (i=1; i<=n; i++) print a[i]}'`
if [[ ! -f nodesStats.txt ]];then
touch nodesStats.txt
fi
echo "$nodesStats" >nodesStats.txt
}
#依据传入的参数值来判断我们要监控的服务是什么
#然后通过各个函数获取到具体的值
#值之间的
function analysisStat(){
getConnNum
parseFileds "ConnectionNum"
getMemory
parseFileds "Memory"
getCPU
parseFileds "CPU"
getThreadNum
parseFileds "Thread"
getRuntime
parseFileds "RUNTIME"
getStarttime
parseFileds "STARTTIME"
if [[ ${IP} == "" || ${IP} == "<<ip>>" ]];then
CURNUM=""
parseFileds "totalShards"
parseFileds "primariesShards"
parseFileds "filteEvictions"
parseFileds "filterSize"
parseFileds "clusterStatus"
parseFileds "docsCount"
parseFileds "docsDelete"
parseFileds "fielddataEvictions"
parseFileds "fielddataSize"
parseFileds "flushToal"
parseFileds "flushToalTime"
parseFileds "indexingDeleteCurrent"
parseFileds "indexingDeleteTime"
parseFileds "indexingDeleteTotal"
parseFileds "indexingIndexCurrent"
parseFileds "indexingIndexTime"
parseFileds "indexingIndexTotal"
parseFileds "jvmGcCollectorsOldCollectionTime"
parseFileds "jvmGcCollectorsOldCount"
parseFileds "jvmGcCollectorsYoungCollectionTime"
parseFileds "jvmGcCollectorsYoungCount"
parseFileds "jvmMemHeapCommitted"
parseFileds "jvmMemHeapMax"
parseFileds "jvmMemHeapUsed"
parseFileds "jvmMemNonHeapCommitted"
parseFileds "jvmMemNonHeapUsed"
parseFileds "jvmThreadsCount"
parseFileds "jvmThreadsPeakCount"
return
fi
#获取es状态
getclusterStats
get_node_stats
total=`cat clusterStats.txt |grep -o '"shards":"total":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${total}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "totalShards"
primaries=`cat clusterStats.txt |grep -o '"primaries":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${primaries}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "primariesShards"
filteevictions=`cat clusterStats.txt |grep -o '"evictions":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${filteevictions}|awk 'NR==1{print $2}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "filteEvictions"
filtersize=`cat clusterStats.txt |grep -o '"filter_cache":"memory_size_in_bytes":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${filtersize}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "filterSize"
states=`cat clusterStats.txt |grep -w '"status":'|awk -F '[":]+' '{print $3}'`
if [ "$states" == "green" -o "$states" = "" ];then
CURNUM=0
elif [ "$states" == "yellow" ];then
CURNUM=2
else
CURNUM=4
fi
parseFileds "clusterStatus"
docscount=`cat clusterStats.txt |grep -o '"docs":"count":"memory_size_in_bytes":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${docscount}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "docsCount"
docsdelete=`cat clusterStats.txt |grep -o '"deleted":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${docsdelete}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "docsDelete"
fielddataevictions=`cat clusterStats.txt |grep -o '"evictions":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${fielddataevictions}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "fielddataEvictions"
fielddatasize=`cat clusterStats.txt |grep -o '"fielddata":"memory_size_in_bytes":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${fielddatasize}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "fielddataSize"
flushtoal=`cat nodesStats.txt |grep -o '"flush":"total":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${flushtoal}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "flushToal"
flushtoaltime=`cat nodesStats.txt |grep -o '"total_time_in_millis":[0-9]\+'|sed "s/[^0-9\.]//g"`
flushtoaltime_first=`echo ${flushtoaltime}|awk 'NR==1{print $3}'`
CURNUM=`awk -v flushtoaltime_first_awk="$flushtoaltime_first" 'BEGIN{printf "%.2f\n",(flushtoaltime_first_awk/ 1000)}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "flushToalTime"
indexingdeletecurrent=`cat nodesStats.txt |grep -o '"delete_current":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${indexingdeletecurrent}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "indexingDeleteCurrent"
indexingdeletetime=`cat nodesStats.txt |grep -o '"delete_time_in_millis":[0-9]\+'|sed "s/[^0-9\.]//g"`
indexingdeletetime_first=`echo ${indexingdeletetime}|awk 'NR==1{print $1}'`
CURNUM=`awk -v indexingdeletetime_first_awk="$indexingdeletetime_first" 'BEGIN{printf "%.2f\n",(indexingdeletetime_first_awk/ 1000)}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "indexingDeleteTime"
indexingdeletetotal=`cat nodesStats.txt |grep -o '"delete_total":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${indexingdeletetotal}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "indexingDeleteTotal"
indexingindexcurrent=`cat nodesStats.txt |grep -o '"index_current":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${indexingindexcurrent}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "indexingIndexCurrent"
indexingindextime=`cat nodesStats.txt |grep -o '"index_time_in_millis":[0-9]\+'|sed "s/[^0-9\.]//g"`
indexingindextime_first=`echo ${indexingindextime}|awk 'NR==1{print $1}'`
CURNUM=`awk -v indexingindextime_first_awk=$indexingindextime_first 'BEGIN{printf "%.2f\n",(indexingindextime_first_awk/ 1000)}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "indexingIndexTime"
indexingindextotal=`cat nodesStats.txt |grep -o '"indexing":"index_total":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${indexingindextotal}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "indexingIndexTotal"
OldCollectionTime=`cat nodesStats.txt |grep -o '"collection_time_in_millis":[0-9]\+'|sed "s/[^0-9\.]//g"`
OldCollectionTime_first=`echo ${OldCollectionTime}|awk 'NR==1{print $2}'`
CURNUM=`awk -v OldCollectionTime_first_awk=$OldCollectionTime_first 'BEGIN{printf "%.2f\n",(OldCollectionTime_first_awk/ 1000)}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "jvmGcCollectorsOldCollectionTime"
OldCount=`cat nodesStats.txt |grep -o '"old":"collection_count":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${OldCount}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "jvmGcCollectorsOldCount"
YoungCollectionTime=`cat nodesStats.txt |grep -o '"collection_time_in_millis":[0-9]\+'|sed "s/[^0-9\.]//g"`
YoungCollectionTime_first=`echo ${YoungCollectionTime}|awk 'NR==1{print $1}'`
CURNUM=`awk -v YoungCollectionTime_first_awk=$YoungCollectionTime_first 'BEGIN{printf "%.2f\n",(YoungCollectionTime_first_awk/ 1000)}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "jvmGcCollectorsYoungCollectionTime"
YoungCount=`cat nodesStats.txt |grep -o '"gc":"collectors":"young":"collection_count":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${YoungCount}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "jvmGcCollectorsYoungCount"
HeapCommitted=`cat nodesStats.txt |grep -o '"heap_committed_in_bytes":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${HeapCommitted}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "jvmMemHeapCommitted"
HeapMax=`cat nodesStats.txt |grep -o '"heap_max_in_bytes":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${HeapMax}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "jvmMemHeapMax"
HeapUsed=`cat nodesStats.txt |grep -o '"heap_used_percent":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${HeapUsed}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "jvmMemHeapUsed"
NonHeapCommitted=`cat nodesStats.txt |grep -o '"non_heap_committed_in_bytes":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${NonHeapCommitted}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "jvmMemNonHeapCommitted"
NonHeapUsed=`cat nodesStats.txt |grep -o '"non_heap_used_in_bytes":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${NonHeapUsed}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "jvmMemNonHeapUsed"
ThreadsCount=`cat nodesStats.txt |grep -o '"threads":"count":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${ThreadsCount}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "jvmThreadsCount"
ThreadsCount=`cat nodesStats.txt |grep -o '"peak_count":[0-9]\+'|sed "s/[^0-9\.]//g"`
CURNUM=`echo ${ThreadsCount}|awk 'NR==1{print $1}'`
if [ -z $CURNUM ];then
CURNUM=0
fi
parseFileds "jvmThreadsPeakCount"
}
#CURNUM 当前值
#判断以上三个值并拼接结果字符串
function parseFileds(){
if [[ "$DataResultStr" != "" ]];then
DataResultStr=$DataResultStr","
fi
DataResultStr="$DataResultStr'$1':'$CURNUM'"
}
#分析结果 给出给出状态
function analysisResult(){
gotErr 0
}
#输出错误信息并且退出程序
function showHelp(){
if [ "$1" != "" ];then
echo "请输入$1"
fi
echo "check_elasticsearch.sh 可以监听本地的flume的状态 参数如下"
echo "check_elasticsearch.sh [-p <port>] [-I <ip>]"
echo "-p <port> 特征字符串 表示监控哪个elasticsearch端口"
echo "-I <ip> ip地址"
exit 1
}
while getopts "p:I:h" arg
do
case $arg in
p)
PORT=$OPTARG
;;
I)
IP=$OPTARG
;;
h)
showHelp
;;
?)
showHelp
;;
esac
done
if [[ "$PORT" == "" ]];then
showHelp "端口号"
fi
#插件正题部分
#确定存活的
#启动
#分析
isAlive
analysisStat
analysisResult