Webarc:PBS Jobs Scripts for Searching
From Adapt
submit.sh
#!/bin/bash #PBS -N waqry_exp #PBS -l walltime=48:00:00 #PBS -l nodes=8 #PBS -m be #PBS -M scsong@gmail.com #PBS -S /bin/bash HOST=`hostname -s` PROGFILE=/vnodehomes/toaster/prog/master.$HOST echo "" > $PROGFILE # # Define function stageout # function stageout { echo "MASTER($HOST): SIGTERM caught. Stage out. Copying any existing outputs" scp -i ~/webarc/id_rsa /scratch1/*.out scsong@naraapp03:/fs/webarc3/data/wikipedia/results/ scp -i ~/webarc/id_rsa /scratch1/*.prog scsong@naraapp03:/fs/webarc3/data/wikipedia/results/ for node in `cat ${PBS_NODEFILE}`; do echo "MASTER($HOST): Staging out $node" ssh $node "\rm -rf /scratch1/*" & done } # # Trap SIGTERM, SIGKILL, SIGINT to invoke function stageout # #trap 'stageout' 2 9 15 # # Stage in variable files # cat ${PBS_NODEFILE} > pbs_nodefile cat ${PBS_NODEFILE} > ~/webarc/tsearch/bin/rpcsvr.list totalnodes=`cat ${PBS_NODEFILE} | wc -l` # # Run rpcsvr.sh all nodes # count=1 for node in `cat ${PBS_NODEFILE}`; do DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$DATE: runqsvr.sh $count $totalnodes at $node" >> $PROGFILE # if [ $count -ne 1 ]; then ssh $node ~/runqsvr.sh $count $totalnodes & # else # ~/runqsvr.sh $count $totalnodes & # fi let count=count+1 done # # Monitor deploy status # PNAME="tsearchsvr" for node in `cat ${PBS_NODEFILE}`; do DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$DATE: checking tsearchsvr at $node" >> $PROGFILE while ! ssh $node /sbin/pidof -x $PNAME > /dev/null; do DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$DATE: tsearchsvr NOT found at $node.. sleep 30 seconds" >> $PROGFILE sleep 30 done DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$DATE: tsearchsvr FOUND at $node.. check next server" >> $PROGFILE done #now every node must have tsearchsvr running # # Run runqcli.sh on all nodes # qts=( 1 2 4 8 16 32 64 83 ) tw=( 1 2 4 8 16 32 64 83 ) DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$DATE: all nodes running tsearchsvr. Launch runqcli.sh" >> $PROGFILE count=0 for node in `cat ${PBS_NODEFILE}`; do #for tw in "1" "2" "4" "8" "16" "32" "64" "83"; do DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$DATE: Launch runqcli.sh ${qts[$count]} $tw at $node" >> $PROGFILE if [ $count -eq 0 ]; then #~/runqcli.sh ${qts[$count]} $tw & ~/runqcli.sh 64 ${tw[$count]} & else #ssh $node "~/runqcli.sh ${qts[$count]} $tw" & ssh $node "~/runqcli.sh 64 ${tw[$count]}" & fi #sleep 2 #done let count=count+1 done # # Monitor runqcli.sh # PNAME="runqcli.sh" for node in `cat ${PBS_NODEFILE}`; do while ssh $node /sbin/pidof -x $PNAME > /dev/null; do DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): runqcli.sh still running at $node. Sleep 10 mins" >> $PROGFILE sleep 600 #sleep 10 minutes done DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): no existing runindex.sh" >> $PROGFILE done #now every runqcli.sh must have finished Stage out count=0 for node in `cat ${PBS_NODEFILE}`; do echo "MASTER($HOST): killing tsearchsvr at $node" >> $PROGFILE ssh $node "pkill -9 tsearchsvr; \rm -rf /scratch1/*" & let count=count+1 done exit
runqsvr.sh
#!/bin/bash HOST=`hostname -s` TWFILE=/scratch1/timewindow.conf ## STAGE OUT ALL DATA ## function stageout { HOST=`hostname -s` DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): SIGTERM Trapped. Staging out" pkill -9 tsearchsvr \rm -rf /scratch1/* } #trap 'stageout' 2 9 15 if [ -f $TWFILE ]; then rm -f $TWFILE fi if [ -f /scratch1/index_copy_complete ]; then rm -f /scratch1/index_copy_complete fi for (( i=$1; i<=83; i=i+$2 )) do d=`printf "%03d" $i` PROGFILE=/vnodehomes/toaster/prog/svr-$d.$HOST if [ -f $PROGFILE ]; then rm -f $PROGFILE fi DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): runqsvr.sh running" >> $PROGFILE ## STAGE IN INDEXES ## DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): Staging in Indexes for month-$d" >> $PROGFILE if [ ! -d /scratch1/month-$d ]; then # if [ ! -f /scratch1/index_ready_$d ]; then #cp -r /fs/webarc3/data/wikipedia/lemur_index/monthly/month-$d /scratch1/ ln -s /fs/webarc3/data/wikipedia/lemur_index/monthly/month-$d /scratch1/ fi echo "$HOST($DATE): Index ready" > /scratch1/index_ready_$d ## ADD ENTRY TO timewindow.conf FILE ## DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): Adding an entry in timewindow.conf for month-$d" >> $PROGFILE echo "$i /scratch1/month-$d TEMP_OKAPI" >> $TWFILE done echo "$HOST($DATE): All indexes ready" >> $PROGFILE ## STAGE IN BINARY ** DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): Staging in tsearchsvr" >> $PROGFILE ln -s ~/webarc/tsearch/bin/tsearchsvr /scratch1/ ## NOTIFY SERVER PRESENCE TO CLIENT (TODO: needs locking mechanism, perhaps) ## #echo "$HOSTNAME" >> ~/webarc/tsearch/bin/rpcsvr.list ## RUN QUERY SERVER ## DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): Starting Query Server" >> $PROGFILE cd /scratch1 /scratch1/tsearchsvr $TWFILE >> $PROGFILE 2>&1 echo "$HOST($DATE): Query Server ENDED????" >> $PROGFILE #while [ 1 ]; do # sleep 60 #done
runqcli.sh
#!/bin/bash ## STAGE OUT ALL DATA ## function stageout { HOST=`hostname -s` DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): SIGTERM Trapped. Staging out" \rm -rf /scratch1/* } #trap 'stageout' 2 9 15 HOST=`hostname -s` PROGFILE=/vnodehomes/toaster/prog/cli-qts$1-tw$2.$HOST if [ -f $PROGFILE ]; then rm -f $PROGFILE fi DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): runqcli.sh running" >> $PROGFILE ## STAGE IN CLIENT BINARY ## DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): Staging in tsearchcli" >> $PROGFILE if [ ! -f /scratch1/tsearchcli ]; then ln -s ~/webarc/tsearch/bin/tsearchcli /scratch1/ fi ## STAGE IN QUERY FILE ## DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): Staging in query file" >> $PROGFILE cp ~/webarc/tsearch/bin/aolquery.txt /scratch1/ ## STAGE IN RPC SERVER LIST FILE ## DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): Staging in rpc server list" >> $PROGFILE cp ~/webarc/tsearch/bin/rpcsvr.list /scratch1/ ## GENERATE QUERY TIME SPANS ## cd /scratch1 if [ ! -f qts.$1 ]; then echo "$HOST($DATE): Generate time spans $1" >> $PROGFILE ~/webarc/tsearch/bin/gentimespan.sh $1 > qts.$1 fi ## GENERATE TIME WINDOWS ## if [ ! -f tw.$2 ]; then DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): Generate time windows $2" >> $PROGFILE ~/webarc/tsearch/bin/gentimewindows.sh $2 > tw.$2 fi ## RUN QUERY CLIENT ## DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): Starting Query Client" >> $PROGFILE /scratch1/tsearchcli /scratch1/rpcsvr.list /scratch1/aolquery.txt qts.$1 tw.$2 >> $PROGFILE 2>&1 DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): Query Client Ended" >> $PROGFILE ## STAGE OUT RESULTS ## DATE=`date +"%m/%d/%y %H:%M:%S"` echo "$HOST($DATE): Copying results" >> $PROGFILE scp -i ~/webarc/id_rsa /scratch1/qts.$1_tw.$2.out scsong@naraapp03:/fs/webarc3/data/wikipedia/results/ exit