Return-Path: carlos@mci.net
Received: from postoffice.reston.mci.net (postoffice.Reston.mci.net [204.70.128.20]) by leibniz.math.psu.edu (8.6.12/8.6.9) with ESMTP id QAA00877 for <barr@math.psu.edu>; Tue, 30 Jan 1996 16:51:53 -0500
Received: from Flame-War.PentagonCity.mci.net (flame-war.pcy.mci.net [204.70.138.50]) by postoffice.reston.mci.net (8.6.12/8.6.6) with ESMTP id QAA12926 for <barr@math.psu.edu>; Tue, 30 Jan 1996 16:51:17 -0500
Received: (from carlos@localhost) by Flame-War.PentagonCity.mci.net (8.7.3/8.6.12) id QAA05776 for barr@math.psu.edu; Tue, 30 Jan 1996 16:49:01 -0500 (EST)
Date: Tue, 30 Jan 1996 16:49:01 -0500 (EST)
From: Carlos Castro <carlos@mci.net>
Message-Id: <199601302149.QAA05776@Flame-War.PentagonCity.mci.net>
To: barr@math.psu.edu
Subject: checkup.sh shell script
Return-Receipt-To: carlos@mci.net
X-Sun-Charset: US-ASCII

Dave, 

  This is a little shell-script that I have used here at MCI to save
my butt many times....  You might want to put it in your ftp site.  

Carlos

#!/bin/sh
#
# CHECKUP.SH
#
# Carlos Castro 950925
#
# This is a script that should be run every 15 minutes or so from
# cron.  It checks innd through the use of ctlinnd mode.  If innd 
# is dead, it will restart it, if it is throttled because it of reasons
# that it should not be, it will restart it, and if it sees news.daily
# taking a bit long to finish, it will alert you.  You can add reasons
# to $THROTTLEGOS if you want it to restart when it is throttled for
# that reason (ie file descriptors).  You can also add reasons to 
# $THROTTLEIGNORES which are the reasons you give ctlinnd when you
# throttle innd by hand (ie. adding newsgroups to active, I use cuz
# when I throttle it for pretty much any thing).  When you use this
# script, make sure that $PATH is correct for your system, as well as 
# the path for innshellvars (that is where it gets a lot of the information
# for your specific INN.  

## You might also want to change this line:
##  =()<. @<_PATH_SHELLVARS>@>()=
. /opt/news/etc/innshellvars

##  You might want to change the path here.
PATH=$NEWSBIN:$NEWSBIN/../etc:/usr/ucb:/bin:/usr/bin:/etc
export PATH

HOST=`hostname`
DAILY_LOCK=${LOCKS}/LOCK.news.daily
DAILY_FLAGS="nologs delayrm expdir=/tmp"
CORE=${SPOOL}/core
NEWS_DAILY="${NEWSBIN}/news.daily ${DAILY_FLAGS}"
MAILTO=${NEWSMASTER}
EXT=`date +%y%m%e%H%M%S`
MESS=/tmp/message.$EXT
MODE="`ctlinnd mode 2>&1 | head -1`"
SERVER=`echo $MODE | awk '{print $1}'`
STATUS=`echo $MODE | awk '{print $2}'`
REASON=`echo $MODE  | sed 's/Server throttled //'`
IFS='
'
## Output from "ctlinnd mode" that indicate a dead server.
DEADSERVERS="Can't send \"mode\" command (dead server failure) No such process.
No innd.pid file; did server die?
Can't send \"mode\" command (sendto failure) No such file or directory."

## These are reasons for why I throttle innd by hand (ie to change the
## the active file).
THROTTLEIGNORES="cuz
Cuz
Ack!"

## Reasons for which innd should not be throttled for very long.
THROTTLEGOS="Error 0 writing history database file -- throttling
File exists writing symlinking article file -- throttling
Not a directory writing article file -- throttling
Too many open files writing articles -- throttling"

## These are reasons to run news.daily on the spot
THROTTLEDAILYS="No space left on device writing article file -- throttling
No space left on device writing logging site file -- throttling"

log_news () { 
	echo $1 > $MESS
	echo $HOST >> $MESS
	echo $1 >> ${MOST_LOGS}/trouble.log
	tail -50 $LOG | awk '{print $1" "$2" "$3" "$4" "$5 \
                " "$6" "$7" "$8}' > ${MOST_LOGS}/last_articles.${EXT}
}


cd $NEWSBIN

if [ -f $DAILY_LOCK ]; then
	date >> .running.daily

		if [ "`wc -l .running.daily`" -le "16" ]; then
			exit

		fi

		log_news "`date` running daily for too long"

		SERVER="Running Daily"
fi

##
## This is what happens when Server is up:
##

if [ "$SERVER" = Server ]; then
 case "$STATUS" in
   running)	if [ -f .paused ]; then
			rm -f .paused
		fi
		if [ -f .running.daily ]; then
			rm -f .running.daily
		fi
		if [ -f .throttleignore ]; then
			rm -f .throttleignore
		fi
		exit;;

   paused)	date >> .paused
		if [ "`wc -l .paused`" -le "3" ]; then
			exit
		fi

		log_news "`date` paused too long: $MODE"

		;;


   throttled) #If we are throttled
		for THROTTLEIGNORE in $THROTTLEIGNORES  #ignore if it is
	      	do 					#for reason we have
		  if [ "$REASON" = $THROTTLEIGNORE ]; then 
		    date >> .throttleignore
		    if [ "`wc -l .throttleignore`" -le "3" ]; then
			    exit
		    fi
	            log_news "`date` throttled too long" 
		  fi
	      	done

	      for THROTTLEGO in $THROTTLEGOS		#give it a go for
	      do 					#reasons we can ignore
		if [ "$REASON" = $THROTTLEGO ]; then 
		  ctlinnd go ${THROTTLEGO} ;
  		  log_news "`date` unthrottled cuz of $MODE" 
		  break 2
		fi
	      done

	      for THROTTLEDAILY in $THROTTLEDAILYS		#give it a go for
	      do 					#reasons we can ignore
		if [ "$REASON" = $THROTTLEDAILY ]; then 
		  su news -c "$NEWS_DAILY" &
  		  log_news "`date` running daily cuz of $MODE"
		  break
		fi
	      done

	      if [ ! -f $MESS ]; then
		log_news "`date` UNKNOWN THROTTLE: $MODE" 
	      fi

	      ;;
 esac
else
##
## Here we test to see if innd is dead
##
 for DEADSERVER in $DEADSERVERS		#Start DEADSERVER for loop
 do
  if [ "$MODE" = $DEADSERVER ]; then

	ctlinnd shutdown cuz;			#  This is to make
	sleep 30;				#  Make sure 2 innds 
	ps ax | grep ${INND} | grep -v grep | \
		awk '{print "kill "$1}' | sh 	#  Are Not Running
	sleep 30
	ps ax | grep ${INND} | grep -v grep | \
		awk '{print "kill -9 "$1}' | sh
	if [ -f ${CORE} ]; then
		cp $CORE /tmp/core.$EXT
		$COMPRESS /tmp/core.$EXT
		mv /tmp/core.${EXT}${Z} $NEWSBIN
	fi

	rc.news;
  	log_news "`date` restarted cuz of $MODE"
	break
  fi
 done					#End DEADSERVER for loop
fi

if [ -f $MESS ]; then
	mail -s "$HOST problem" $MAILTO< $MESS ;
	rm -f $MESS
else
	mail -s "broken script" $MAILTO < /dev/null;
fi

#END
