在运行spark任务时,可能因为某种异常问题导致任务没有正常退出。一直处于假死异常状态。这就需要我们通过一直机制发现这样的情况。
#!/bin/bash todaydate=`expr $(date +%s) \* 1000` yarn application -list -appStates running |grep "application_"|grep -v "stream" |awk '{print $1}' | while read line do yarn application -status $line > /tmp/yarn.txt id=`sed -n '/Application-Id/p' /tmp/yarn.txt |awk -F ":" '{print $2}'|sed 's/^[ \t]*//g'` name=`sed -n '/Application-Name/p' /tmp/yarn.txt |awk -F ":" '{print $2}'|sed 's/^[ \t]*//g'` Time=`sed -n '/Start-Time/p' /tmp/yarn.txt |awk -F ":" '{print int($2)}'` numdate=`expr $1 \* 60 \* 60 \* 1000` echo ${id}--${name}--${Time}--${date}--${numdate} echo ${Time}--${numdate} Time_numdate=`expr $Time + $numdate` echo Time_numdate:$Time_numdate echo today:$todaydate if [ ${Time_numdate} -lt ${todaydate} ];then if [ $name = "test_parquet" ];then yarn application -kill $id curl "http://xxxx:8080/alarm/sendSms.do?mobile=1515813****&type=0&producer=CDH&body=任务运行超时异常,任务id:$id,任务名称:$name,任务运行已超过${1}小时,该任务为Zeppelin任务,已被kill" else curl "http://xxxx:8080/alarm/sendSms.do?mobile=1515813****&type=0&producer=CDH&body=任务运行超时异常,任务id:$id,任务名称:$name,任务运行已超过${1}小时" fi else echo ${Time_numdate} 大于 ${todaydate} 未超时 fi done