Webarc:PBS Jobs Scripts for Searching
From Adapt
submit.sh
#!/bin/bash
#PBS -N waqry_exp
#PBS -l walltime=48:00:00
#PBS -l nodes=8
#PBS -m be
#PBS -M scsong@gmail.com
#PBS -S /bin/bash
HOST=`hostname -s`
PROGFILE=/vnodehomes/toaster/prog/master.$HOST
echo "" > $PROGFILE
#
# Define function stageout
#
function stageout {
echo "MASTER($HOST): SIGTERM caught. Stage out. Copying any existing outputs"
scp -i ~/webarc/id_rsa /scratch1/*.out scsong@naraapp03:/fs/webarc3/data/wikipedia/results/
scp -i ~/webarc/id_rsa /scratch1/*.prog scsong@naraapp03:/fs/webarc3/data/wikipedia/results/
for node in `cat ${PBS_NODEFILE}`; do
echo "MASTER($HOST): Staging out $node"
ssh $node "\rm -rf /scratch1/*" &
done
}
#
# Trap SIGTERM, SIGKILL, SIGINT to invoke function stageout
#
#trap 'stageout' 2 9 15
#
# Stage in variable files
#
cat ${PBS_NODEFILE} > pbs_nodefile
cat ${PBS_NODEFILE} > ~/webarc/tsearch/bin/rpcsvr.list
totalnodes=`cat ${PBS_NODEFILE} | wc -l`
#
# Run rpcsvr.sh all nodes
#
count=1
for node in `cat ${PBS_NODEFILE}`; do
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$DATE: runqsvr.sh $count $totalnodes at $node" >> $PROGFILE
# if [ $count -ne 1 ]; then
ssh $node ~/runqsvr.sh $count $totalnodes &
# else
# ~/runqsvr.sh $count $totalnodes &
# fi
let count=count+1
done
#
# Monitor deploy status
#
PNAME="tsearchsvr"
for node in `cat ${PBS_NODEFILE}`; do
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$DATE: checking tsearchsvr at $node" >> $PROGFILE
while ! ssh $node /sbin/pidof -x $PNAME > /dev/null; do
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$DATE: tsearchsvr NOT found at $node.. sleep 30 seconds" >> $PROGFILE
sleep 30
done
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$DATE: tsearchsvr FOUND at $node.. check next server" >> $PROGFILE
done #now every node must have tsearchsvr running
#
# Run runqcli.sh on all nodes
#
qts=( 1 2 4 8 16 32 64 83 )
tw=( 1 2 4 8 16 32 64 83 )
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$DATE: all nodes running tsearchsvr. Launch runqcli.sh" >> $PROGFILE
count=0
for node in `cat ${PBS_NODEFILE}`; do
#for tw in "1" "2" "4" "8" "16" "32" "64" "83"; do
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$DATE: Launch runqcli.sh ${qts[$count]} $tw at $node" >> $PROGFILE
if [ $count -eq 0 ]; then
#~/runqcli.sh ${qts[$count]} $tw &
~/runqcli.sh 64 ${tw[$count]} &
else
#ssh $node "~/runqcli.sh ${qts[$count]} $tw" &
ssh $node "~/runqcli.sh 64 ${tw[$count]}" &
fi
#sleep 2
#done
let count=count+1
done
#
# Monitor runqcli.sh
#
PNAME="runqcli.sh"
for node in `cat ${PBS_NODEFILE}`; do
while ssh $node /sbin/pidof -x $PNAME > /dev/null; do
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): runqcli.sh still running at $node. Sleep 10 mins" >> $PROGFILE
sleep 600 #sleep 10 minutes
done
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): no existing runindex.sh" >> $PROGFILE
done #now every runqcli.sh must have finished
Stage out
count=0
for node in `cat ${PBS_NODEFILE}`; do
echo "MASTER($HOST): killing tsearchsvr at $node" >> $PROGFILE
ssh $node "pkill -9 tsearchsvr; \rm -rf /scratch1/*" &
let count=count+1
done
exit
runqsvr.sh
#!/bin/bash
HOST=`hostname -s`
TWFILE=/scratch1/timewindow.conf
## STAGE OUT ALL DATA ##
function stageout {
HOST=`hostname -s`
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): SIGTERM Trapped. Staging out"
pkill -9 tsearchsvr
\rm -rf /scratch1/*
}
#trap 'stageout' 2 9 15
if [ -f $TWFILE ]; then
rm -f $TWFILE
fi
if [ -f /scratch1/index_copy_complete ]; then
rm -f /scratch1/index_copy_complete
fi
for (( i=$1; i<=83; i=i+$2 ))
do
d=`printf "%03d" $i`
PROGFILE=/vnodehomes/toaster/prog/svr-$d.$HOST
if [ -f $PROGFILE ]; then
rm -f $PROGFILE
fi
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): runqsvr.sh running" >> $PROGFILE
## STAGE IN INDEXES ##
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Staging in Indexes for month-$d" >> $PROGFILE
if [ ! -d /scratch1/month-$d ]; then
# if [ ! -f /scratch1/index_ready_$d ]; then
#cp -r /fs/webarc3/data/wikipedia/lemur_index/monthly/month-$d /scratch1/
ln -s /fs/webarc3/data/wikipedia/lemur_index/monthly/month-$d /scratch1/
fi
echo "$HOST($DATE): Index ready" > /scratch1/index_ready_$d
## ADD ENTRY TO timewindow.conf FILE ##
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Adding an entry in timewindow.conf for month-$d" >> $PROGFILE
echo "$i /scratch1/month-$d TEMP_OKAPI" >> $TWFILE
done
echo "$HOST($DATE): All indexes ready" >> $PROGFILE
## STAGE IN BINARY **
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Staging in tsearchsvr" >> $PROGFILE
ln -s ~/webarc/tsearch/bin/tsearchsvr /scratch1/
## NOTIFY SERVER PRESENCE TO CLIENT (TODO: needs locking mechanism, perhaps) ##
#echo "$HOSTNAME" >> ~/webarc/tsearch/bin/rpcsvr.list
## RUN QUERY SERVER ##
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Starting Query Server" >> $PROGFILE
cd /scratch1
/scratch1/tsearchsvr $TWFILE >> $PROGFILE 2>&1
echo "$HOST($DATE): Query Server ENDED????" >> $PROGFILE
#while [ 1 ]; do
# sleep 60
#done
runqcli.sh
#!/bin/bash
## STAGE OUT ALL DATA ##
function stageout {
HOST=`hostname -s`
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): SIGTERM Trapped. Staging out"
\rm -rf /scratch1/*
}
#trap 'stageout' 2 9 15
HOST=`hostname -s`
PROGFILE=/vnodehomes/toaster/prog/cli-qts$1-tw$2.$HOST
if [ -f $PROGFILE ]; then
rm -f $PROGFILE
fi
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): runqcli.sh running" >> $PROGFILE
## STAGE IN CLIENT BINARY ##
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Staging in tsearchcli" >> $PROGFILE
if [ ! -f /scratch1/tsearchcli ]; then
ln -s ~/webarc/tsearch/bin/tsearchcli /scratch1/
fi
## STAGE IN QUERY FILE ##
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Staging in query file" >> $PROGFILE
cp ~/webarc/tsearch/bin/aolquery.txt /scratch1/
## STAGE IN RPC SERVER LIST FILE ##
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Staging in rpc server list" >> $PROGFILE
cp ~/webarc/tsearch/bin/rpcsvr.list /scratch1/
## GENERATE QUERY TIME SPANS ##
cd /scratch1
if [ ! -f qts.$1 ]; then
echo "$HOST($DATE): Generate time spans $1" >> $PROGFILE
~/webarc/tsearch/bin/gentimespan.sh $1 > qts.$1
fi
## GENERATE TIME WINDOWS ##
if [ ! -f tw.$2 ]; then
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Generate time windows $2" >> $PROGFILE
~/webarc/tsearch/bin/gentimewindows.sh $2 > tw.$2
fi
## RUN QUERY CLIENT ##
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Starting Query Client" >> $PROGFILE
/scratch1/tsearchcli /scratch1/rpcsvr.list /scratch1/aolquery.txt qts.$1 tw.$2 >> $PROGFILE 2>&1
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Query Client Ended" >> $PROGFILE
## STAGE OUT RESULTS ##
DATE=`date +"%m/%d/%y %H:%M:%S"`
echo "$HOST($DATE): Copying results" >> $PROGFILE
scp -i ~/webarc/id_rsa /scratch1/qts.$1_tw.$2.out scsong@naraapp03:/fs/webarc3/data/wikipedia/results/
exit