Personal tools

Webarc:PBS Jobs Scripts for Searching

From Adapt

Revision as of 01:18, 10 November 2009 by Scsong (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search

submit.sh

#!/bin/bash
#PBS -N waqry_exp
#PBS -l walltime=48:00:00
#PBS -l nodes=8
#PBS -m be
#PBS -M scsong@gmail.com
#PBS -S /bin/bash

HOST=`hostname -s`
PROGFILE=/vnodehomes/toaster/prog/master.$HOST
echo "" > $PROGFILE

#
# Define function stageout
#
function stageout {
   echo "MASTER($HOST): SIGTERM caught. Stage out. Copying any existing outputs"
	scp -i ~/webarc/id_rsa /scratch1/*.out scsong@naraapp03:/fs/webarc3/data/wikipedia/results/
	scp -i ~/webarc/id_rsa /scratch1/*.prog scsong@naraapp03:/fs/webarc3/data/wikipedia/results/
   for node in `cat ${PBS_NODEFILE}`; do
	   echo "MASTER($HOST): Staging out $node"
      ssh $node "\rm -rf /scratch1/*" &
   done
}

#
# Trap SIGTERM, SIGKILL, SIGINT to invoke function stageout
#
#trap 'stageout' 2 9 15

#
# Stage in variable files
#
cat ${PBS_NODEFILE} > pbs_nodefile
cat ${PBS_NODEFILE} > ~/webarc/tsearch/bin/rpcsvr.list
totalnodes=`cat ${PBS_NODEFILE} | wc -l`

#
# Run rpcsvr.sh all nodes
#
count=1
for node in `cat ${PBS_NODEFILE}`; do
	DATE=`date +"%m/%d/%y %H:%M:%S"`
	echo "$DATE: runqsvr.sh $count $totalnodes at $node" >> $PROGFILE
#	if [ $count -ne 1 ]; then
   	ssh $node ~/runqsvr.sh $count $totalnodes &
#	else
#		~/runqsvr.sh $count $totalnodes &
#   fi
   let count=count+1
done

#
# Monitor deploy status
#
PNAME="tsearchsvr"
for node in `cat ${PBS_NODEFILE}`; do
	DATE=`date +"%m/%d/%y %H:%M:%S"`
	echo "$DATE: checking tsearchsvr at $node" >> $PROGFILE 
	while ! ssh $node /sbin/pidof -x $PNAME > /dev/null; do
		DATE=`date +"%m/%d/%y %H:%M:%S"`
	   echo "$DATE: tsearchsvr NOT found at $node.. sleep 30 seconds" >> $PROGFILE 
		sleep 30
	done
	DATE=`date +"%m/%d/%y %H:%M:%S"`
	echo "$DATE: tsearchsvr FOUND at $node.. check next server" >> $PROGFILE
done #now every node must have tsearchsvr running 

#
# Run runqcli.sh on all nodes
#
qts=( 1 2 4 8 16 32 64 83 )
tw=( 1 2 4 8 16 32 64 83 )
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$DATE: all nodes running tsearchsvr. Launch runqcli.sh" >> $PROGFILE 
count=0
for node in `cat ${PBS_NODEFILE}`; do
	#for tw in "1" "2" "4" "8" "16" "32" "64" "83"; do
		DATE=`date +"%m/%d/%y %H:%M:%S"`
		echo "$DATE: Launch runqcli.sh ${qts[$count]} $tw at $node" >> $PROGFILE 
		if [ $count -eq 0 ]; then
			#~/runqcli.sh ${qts[$count]} $tw &
			~/runqcli.sh 64 ${tw[$count]} &
		else
			#ssh $node "~/runqcli.sh ${qts[$count]} $tw" &
			ssh $node "~/runqcli.sh 64 ${tw[$count]}" &
		fi
		#sleep 2
	#done
	let count=count+1
done

#
# Monitor runqcli.sh
#
PNAME="runqcli.sh"
for node in `cat ${PBS_NODEFILE}`; do
   while ssh $node /sbin/pidof -x $PNAME > /dev/null; do
		DATE=`date +"%m/%d/%y %H:%M:%S"`
	   echo "$HOST($DATE): runqcli.sh still running at $node. Sleep 10 mins" >> $PROGFILE
      sleep 600 #sleep 10 minutes
	done
   DATE=`date +"%m/%d/%y %H:%M:%S"`
   echo "$HOST($DATE): no existing runindex.sh" >> $PROGFILE
done #now every runqcli.sh must have finished

 Stage out
count=0
for node in `cat ${PBS_NODEFILE}`; do
   echo "MASTER($HOST): killing tsearchsvr at $node" >> $PROGFILE 
	ssh $node "pkill -9 tsearchsvr; \rm -rf /scratch1/*" &
	let count=count+1
done

exit

runqsvr.sh

#!/bin/bash

HOST=`hostname -s`
TWFILE=/scratch1/timewindow.conf

## STAGE OUT ALL DATA ##
function stageout {
	HOST=`hostname -s`
	DATE=`date +"%m/%d/%y %H:%M:%S"`
   echo "$HOST($DATE): SIGTERM Trapped. Staging out"
	pkill -9 tsearchsvr
   \rm -rf /scratch1/*
}
#trap 'stageout' 2 9 15

if [ -f $TWFILE ]; then 
	rm -f $TWFILE
fi
if [ -f /scratch1/index_copy_complete ]; then 
	rm -f /scratch1/index_copy_complete
fi

for (( i=$1; i<=83; i=i+$2 ))
do
   d=`printf "%03d" $i`
	PROGFILE=/vnodehomes/toaster/prog/svr-$d.$HOST
	if [ -f $PROGFILE ]; then
		rm -f $PROGFILE
	fi

	DATE=`date +"%m/%d/%y %H:%M:%S"`
	echo "$HOST($DATE): runqsvr.sh running" >> $PROGFILE

   ## STAGE IN INDEXES ##
	DATE=`date +"%m/%d/%y %H:%M:%S"`
   echo "$HOST($DATE): Staging in Indexes for month-$d" >> $PROGFILE
	if [ ! -d /scratch1/month-$d ]; then
#	if [ ! -f /scratch1/index_ready_$d ]; then
      #cp -r /fs/webarc3/data/wikipedia/lemur_index/monthly/month-$d /scratch1/
   	ln -s /fs/webarc3/data/wikipedia/lemur_index/monthly/month-$d /scratch1/
	fi
	echo "$HOST($DATE): Index ready" > /scratch1/index_ready_$d

   ## ADD ENTRY TO timewindow.conf FILE ##
	DATE=`date +"%m/%d/%y %H:%M:%S"`
   echo "$HOST($DATE): Adding an entry in timewindow.conf for month-$d" >> $PROGFILE
   echo "$i /scratch1/month-$d TEMP_OKAPI" >> $TWFILE
done
	echo "$HOST($DATE): All indexes ready" >> $PROGFILE

## STAGE IN BINARY **
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Staging in tsearchsvr" >> $PROGFILE
ln -s ~/webarc/tsearch/bin/tsearchsvr /scratch1/


## NOTIFY SERVER PRESENCE TO CLIENT (TODO: needs locking mechanism, perhaps) ##
#echo "$HOSTNAME" >> ~/webarc/tsearch/bin/rpcsvr.list

## RUN QUERY SERVER ##
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Starting Query Server" >> $PROGFILE
cd /scratch1
/scratch1/tsearchsvr  $TWFILE  >> $PROGFILE  2>&1
echo "$HOST($DATE): Query Server ENDED????"  >> $PROGFILE


#while [ 1 ]; do
#   sleep 60
#done

runqcli.sh

#!/bin/bash

## STAGE OUT ALL DATA ##
function stageout {
	HOST=`hostname -s`
	DATE=`date +"%m/%d/%y %H:%M:%S"`
   echo "$HOST($DATE): SIGTERM Trapped. Staging out"
   \rm -rf /scratch1/*
}
#trap 'stageout' 2 9 15

HOST=`hostname -s`
PROGFILE=/vnodehomes/toaster/prog/cli-qts$1-tw$2.$HOST
if [ -f $PROGFILE ]; then
	rm -f $PROGFILE
fi

DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): runqcli.sh running" >> $PROGFILE

## STAGE IN CLIENT BINARY ##
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Staging in tsearchcli" >> $PROGFILE
if [ ! -f /scratch1/tsearchcli ]; then
	ln -s ~/webarc/tsearch/bin/tsearchcli /scratch1/
fi

## STAGE IN QUERY FILE ##
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Staging in query file" >> $PROGFILE
cp ~/webarc/tsearch/bin/aolquery.txt /scratch1/

## STAGE IN RPC SERVER LIST FILE ##
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Staging in rpc server list" >> $PROGFILE
cp ~/webarc/tsearch/bin/rpcsvr.list /scratch1/

## GENERATE QUERY TIME SPANS ##
cd /scratch1
if [ ! -f qts.$1 ]; then
	echo "$HOST($DATE): Generate time spans $1" >> $PROGFILE
	~/webarc/tsearch/bin/gentimespan.sh $1 > qts.$1
fi

## GENERATE TIME  WINDOWS ##
if [ ! -f tw.$2 ]; then
	DATE=`date +"%m/%d/%y %H:%M:%S"`
	echo "$HOST($DATE): Generate time windows $2" >> $PROGFILE
	~/webarc/tsearch/bin/gentimewindows.sh $2 > tw.$2
fi

## RUN QUERY CLIENT ##
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Starting Query Client" >> $PROGFILE
/scratch1/tsearchcli /scratch1/rpcsvr.list /scratch1/aolquery.txt qts.$1 tw.$2 >> $PROGFILE  2>&1
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Query Client Ended" >> $PROGFILE

## STAGE OUT RESULTS ##
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Copying results" >> $PROGFILE
scp -i ~/webarc/id_rsa /scratch1/qts.$1_tw.$2.out scsong@naraapp03:/fs/webarc3/data/wikipedia/results/

exit