
#! /bin/bash
export LANG=en_US.UTF-8
CURNUM= #当前的性能参数
DataResultStr= #性能参数字符串
FLAGE=
FLAGEID=
#是否输出FLAGPID
FLAGEOPID=1
#是否输出RUNTIME
RUNTIME=1
#是否输出STARTTIME
STARTTIME=0
CORENUM=1
PORT=
programalive_acquisition_json=""
function gotErr(){
if [ "$1" -eq 0 ];then
echo "mailstatedes=各项指标恢复正常 statedes=各项指标恢复正常|$DataResultStr"
elif [ "$1" -eq 3 ];then
echo "无法获取到指标,疑似组件故障,请确认并请检查监控脚本和运维平台配置"
elif [ "$1" -eq 4 ];then
echo "该组件进程为僵尸进程,请确认并请检查该组件状态"
fi
exit $1
}
#返回是否服务是否存活
function isAlive(){
CORENUM=`cat /proc/cpuinfo |grep "physical id"|wc -l`
FF=$(echo "$FLAGE"|awk -F ',' '{print $1}')
PP=$(echo "$FLAGE"|awk -F ',' '{print $2}')
FLAGEID=$(netstat -lnp | grep -w $PORT|awk '{for(i=1;i<=NF;++i){if(index($i,"/")>=1){print $i}}}'|awk -F '/' '{print $1}')
if [[ "$FLAGEID" == "" ]];then
gotErr 3
fi
ZOMBIE=$(ps -A -opid | grep -e '^[Zz]' | grep $FLAGEID)
if [[ "$ZOMBIE" != "" ]];then
gotErr 4
fi
programalive_acquisition_json="$programalive_acquisition_json,'FLAGEPID':'$FLAGEID'"
}
#处理连接数
function getConnNum(){
CURNUM=$(netstat -apn | grep $FLAGEID| grep ESTABLISHED | wc -l)
programalive_acquisition_json="$programalive_acquisition_json,'connnum':'$CURNUM'"
}
#ps aux 4是内存 累加4的值
function getMemory(){
CURNUM=$(ps aux | grep $FLAGEID |awk 'BEGIN{sum=0}{sum+=$4}END{print sum}')
programalive_acquisition_json="$programalive_acquisition_json,'memory':'$CURNUM'"
}
#ps aux 3是CPU 累加3的值
function getCPU(){
CURNUM=$(ps aux | grep $FLAGEID |awk 'BEGIN{sum=0}{sum+=$3}END{print sum}')
if [ "$CORENUM" -ne 0 ];then
CURNUM=$(printf "%.2f" `echo "scale=2;$CURNUM/$CORENUM" | bc`)
fi
programalive_acquisition_json="$programalive_acquisition_json,'cpu':'$CURNUM'"
}
#ps elm 是PID所包含的线程数目
function getThreadNum(){
CURNUM=$(ps -mp $FLAGEID |wc -l | awk 'BEGIN{sum=0}{sum=$1-2}END{print sum}')
programalive_acquisition_json="$programalive_acquisition_json,'threadnum':'$CURNUM'"
}
#ps -eo pid,etime 是查询进程运行时间
function getRuntime(){
CURNUM=$(ps -eo pid,etime | grep $FLAGEID |awk '{if($1=='$FLAGEID') print $2}')
programalive_acquisition_json="$programalive_acquisition_json,'runtime':'$CURNUM'"
}
#ps -eo pid,lstart是查询进程的开始时间
function getStarttime(){
CURNUM=$(ps -eo pid,lstart | grep $FLAGEID |awk '{if($1=='$FLAGEID') print $2 ,$3 ,$4 ,$5 ,$6}')
programalive_acquisition_json="$programalive_acquisition_json,'starttime':'$CURNUM'"
}
#依据传入的参数值来判断我们要监控的服务是什么
#然后通过各个函数获取到具体的值
#值之间的
function analysisStat(){
getConnNum
#parseFileds "ConnectionNum"
getMemory
#parseFileds "Memory"
getCPU
#parseFileds "CPU"
getThreadNum
#parseFileds "Thread"
#if [ "$FLAGEOPID" -eq 1 ];then
#CURNUM=$FLAGEID
#parseTimeFileds "FLAGEPID" $CURNUM $CURNUM
#fi
#if [ "$RUNTIME" -eq 1 ];then
getRuntime
#parseTimeFileds "RUNTIME" $RUNTIME $RUNTIME
#fi
#if [ "$STARTTIME" -eq 1 ];then
getStarttime
#parseTimeFileds "STARTTIME" $STARTTIME $STARTTIME
#fi
result="{'cpname':'hbase',${programalive_acquisition_json#*,}}"
result=`echo ${result//\'/\"}`
echo $result
}
function parseTimeFileds(){
val1=$(echo "$CURNUM $2")
val2=$(echo "$CURNUM $3")
DataResultStr="$DataResultStr $1=$CURNUM;$2;$3;0;0"
}
function parseFileds(){
DataResultStr="$DataResultStr $1=$CURNUM"
}
#分析结果 给出给出状态
function analysisResult(){
gotErr 0
}
#输出错误信息并且退出程序
function showHelp(){
if [ "$1" != "" ];then
echo "请输入$1"
fi
echo "check_hbase.sh 用于监控进程的存活状态,并收集CPU,内存,连接数及线程数 参数如下"
echo "check_hbase.sh -p <port>"
echo "-p <port> 端口号"
exit 1
}
while getopts "p:h" arg
do
case $arg in
h)
showHelp
;;
p)
PORT=$OPTARG
;;
?)
showHelp
;;
esac
done
if [[ "$PORT" == "" ]];then
showHelp "特征项"
fi
#插件正题部分
#确定存活的
#启动
#分析
isAlive
analysisStat
#analysisResult