背景
实现CDH服务出现异常能够及时通知。
脚本实现
#! /bin/bash
export count
=0
export BADserver
export sc
=0
export lxcount
=0
while true
do
while read line
do
rs
=`curl -u user:passwd -X GET http://hdh02.c.p.xyidc:7180/api/v33/clusters/EZCluster/services/$line|jq .healthSummary | sed 's/\"//g'`
if [ ${rs} == "BAD" ];
then
sc
=1
count
=$((count+1))
if [[ "$BADserver" =~
$line ]]
then
echo "------------------------------------------------------------------------------$line exits"
else
BADserver
=$line,
$BADserver
echo "------------------------------------------------------------------------------BADserver=${BADserver%,}"
fi
fi
echo -----------------server
=$line healthSummary
=$rs count
=$count lxcount
=$lxcount------------------
done
<serveies.txt
if [ $sc -ge 1
];
then
lxcount
=$((lxcount+1))
echo --------------------------------------------------------------------------------Have some server BAD
!!!
else
echo --------------------------------------------------------------------------------All server not BAD now
count
=0
lxcount
=0
BADserver
=
fi
sc
=0
if [ $count -ge 6 -a
$lxcount -ge 8
];
then
message
="不得了了,出大事啦,线上环境大数据服务${BADserver%,}状态异常!!!"
curl "http://xxxx:8080/alarm/sendSms.do?mobile=1515813***5&body=${message}&type=0&producer=CDH"
echo $message
if [[ "$BADserver" =~
"hive" ]];
then
echo "------------------------------------------------------------------------------restart hive "
msg
="大数据线上集群hive服务状态异常,hive服务重启完成"
if [[ "$?" =~
"0" ]]
then
echo "$msg $BADserver $lxcount"
fi
fi
lxcount
=0
count
=0
fi
sleep 3m
done
echo "-----------task is stoped!----------------"