<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://wiki.umiacs.umd.edu/adapt/index.php?action=history&amp;feed=atom&amp;title=Webarc%3APBS_Jobs_Scripts_for_Indexing</id>
	<title>Webarc:PBS Jobs Scripts for Indexing - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://wiki.umiacs.umd.edu/adapt/index.php?action=history&amp;feed=atom&amp;title=Webarc%3APBS_Jobs_Scripts_for_Indexing"/>
	<link rel="alternate" type="text/html" href="https://wiki.umiacs.umd.edu/adapt/index.php?title=Webarc:PBS_Jobs_Scripts_for_Indexing&amp;action=history"/>
	<updated>2026-04-07T13:30:15Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.43.7</generator>
	<entry>
		<id>https://wiki.umiacs.umd.edu/adapt/index.php?title=Webarc:PBS_Jobs_Scripts_for_Indexing&amp;diff=2516&amp;oldid=prev</id>
		<title>Scsong at 01:16, 10 November 2009</title>
		<link rel="alternate" type="text/html" href="https://wiki.umiacs.umd.edu/adapt/index.php?title=Webarc:PBS_Jobs_Scripts_for_Indexing&amp;diff=2516&amp;oldid=prev"/>
		<updated>2009-11-10T01:16:31Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;submit.sh&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#PBS -N waexp&lt;br /&gt;
#PBS -l walltime=48:00:00&lt;br /&gt;
#PBS -l nodes=5&lt;br /&gt;
#PBS -m be&lt;br /&gt;
#PBS -M scsong@gmail.com&lt;br /&gt;
#PBS -S /bin/bash&lt;br /&gt;
#PBS -V&lt;br /&gt;
&lt;br /&gt;
HOST=`hostname -s`&lt;br /&gt;
PROGFILE=/vnodehomes/toaster/webarc/submit.prog&lt;br /&gt;
FIRSTINDEXNO=58&lt;br /&gt;
&lt;br /&gt;
#&lt;br /&gt;
# Define function stageout&lt;br /&gt;
#&lt;br /&gt;
function stageout {&lt;br /&gt;
	DATE=`date +&amp;quot;%m/%d/%y %H:%M:%S&amp;quot;`&lt;br /&gt;
	echo &amp;quot;$HOST($DATE): SIGTERM caught. Stage out. Copying any existing outputs&amp;quot;&lt;br /&gt;
	for node in `cat ${PBS_NODEFILE}`; do&lt;br /&gt;
		echo &amp;quot;$HOST($DATE): Staging out $node&amp;quot;&lt;br /&gt;
		ssh $node &amp;quot;\rm -rf /scratch1/*&amp;quot; &amp;amp;&lt;br /&gt;
	done&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
#&lt;br /&gt;
# Trap SIGTERM, SIGKILL, SIGINT to invoke function stageout&lt;br /&gt;
#&lt;br /&gt;
trap &amp;#039;stageout&amp;#039; 2 9 15&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
totalnodes=`cat ${PBS_NODEFILE} | wc -l`&lt;br /&gt;
echo &amp;quot;&amp;quot; &amp;gt; $PROGFILE&lt;br /&gt;
cat ${PBS_NODEFILE} &amp;gt; ~/pbs_nodefile&lt;br /&gt;
&lt;br /&gt;
#&lt;br /&gt;
# Run runindex.sh at worker nodes&lt;br /&gt;
#&lt;br /&gt;
count=$FIRSTINDEXNO&lt;br /&gt;
for node in `cat ${PBS_NODEFILE}`; do&lt;br /&gt;
	if [ $count -eq $FIRSTINDEXNO ]; then # this is master node&lt;br /&gt;
		for (( i=$count; i&amp;lt;=83; i=i+$totalnodes )); do&lt;br /&gt;
			echo &amp;quot;$HOST($DATE): $node is being deployed&amp;quot; &amp;gt;&amp;gt; $PROGFILE&lt;br /&gt;
			~/webarc/runindex.sh $i &amp;amp;&lt;br /&gt;
		done&lt;br /&gt;
	else&lt;br /&gt;
		for (( i=$count; i&amp;lt;=83; i=i+$totalnodes )); do&lt;br /&gt;
			echo &amp;quot;$HOST($DATE): $node is being deployed&amp;quot; &amp;gt;&amp;gt; $PROGFILE&lt;br /&gt;
			ssh $node &amp;quot;~/webarc/runindex.sh $i&amp;quot; &amp;amp;&lt;br /&gt;
		done&lt;br /&gt;
	fi&lt;br /&gt;
	let count=count+1&lt;br /&gt;
done&lt;br /&gt;
&lt;br /&gt;
#&lt;br /&gt;
# Monitor deploy status&lt;br /&gt;
#&lt;br /&gt;
PNAME=&amp;quot;runindex.sh&amp;quot;&lt;br /&gt;
for node in `cat ${PBS_NODEFILE}`; do&lt;br /&gt;
	while ssh $node /sbin/pidof -x $PNAME &amp;gt; /dev/null; do&lt;br /&gt;
		DATE=`date +&amp;quot;%m/%d/%y %H:%M:%S&amp;quot;`&lt;br /&gt;
  		echo &amp;quot;$HOST($DATE): runindex.sh still running at $node&amp;quot; &amp;gt;&amp;gt; $PROGFILE&lt;br /&gt;
		sleep 300 #sleep 5 minutes&lt;br /&gt;
	done&lt;br /&gt;
	DATE=`date +&amp;quot;%m/%d/%y %H:%M:%S&amp;quot;`&lt;br /&gt;
	echo &amp;quot;$HOST($DATE): no existing runindex.sh&amp;quot; &amp;gt;&amp;gt; $PROGFILE&lt;br /&gt;
done&lt;br /&gt;
&lt;br /&gt;
#&lt;br /&gt;
# Stage out all data&lt;br /&gt;
#&lt;br /&gt;
for node in `cat ${PBS_NODEFILE}`; do&lt;br /&gt;
	DATE=`date +&amp;quot;%m/%d/%y %H:%M:%S&amp;quot;`&lt;br /&gt;
	echo &amp;quot;$HOST($DATE): stage out data from $node&amp;quot; &amp;gt;&amp;gt; $PROGFILE&lt;br /&gt;
	ssh $node &amp;quot;\rm -rf /scratch1/*&amp;quot; &amp;amp;&lt;br /&gt;
done&lt;br /&gt;
&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
runindex.sh&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
#!/usr/bin/bash&lt;br /&gt;
&lt;br /&gt;
d=`printf &amp;quot;%03d&amp;quot; $1`&lt;br /&gt;
HOST=`hostname -s`&lt;br /&gt;
PROGFILE=/vnodehomes/toaster/webarc/prog/index-$d.$HOST&lt;br /&gt;
echo &amp;quot;&amp;quot; &amp;gt; $PROGFILE&lt;br /&gt;
&lt;br /&gt;
cd /scratch1&lt;br /&gt;
&lt;br /&gt;
## STAGE IN LIBRARIES ##&lt;br /&gt;
if [ ! -d /scratch1/lib ]; then &lt;br /&gt;
	cp -r /fs/webarc3/data/wikipedia/lib /scratch1/&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
## STAGE IN BDB FILE ##&lt;br /&gt;
DATE=`date +&amp;quot;%m/%d/%y %H:%M:%S&amp;quot;`&lt;br /&gt;
echo &amp;quot;$HOST($DATE): Staging in BDB File ($d)&amp;quot; &amp;gt;&amp;gt; $PROGFILE&lt;br /&gt;
if [ ! -d /scratch1/month-$d-co ]; then&lt;br /&gt;
	cp -r /fs/webarc3/data/wikipedia/bdb-monthly/month-$d-co /scratch1/&lt;br /&gt;
	chmod 755 /scratch1/month-$d-co&lt;br /&gt;
fi&lt;br /&gt;
#ln -s /fs/webarc3/data/wikipedia/bdb-monthly/month-$d-co /scratch1/&lt;br /&gt;
&lt;br /&gt;
## STAGE IN XML FILE ##&lt;br /&gt;
DATE=`date +&amp;quot;%m/%d/%y %H:%M:%S&amp;quot;`&lt;br /&gt;
echo &amp;quot;$HOST($DATE): Staging in XML File ($d)&amp;quot; &amp;gt;&amp;gt; $PROGFILE&lt;br /&gt;
if [ ! -d /scratch1/preprocessed-monthly ]; then&lt;br /&gt;
	ln -s /fs/webarc3/data/wikipedia/preprocessed-monthly /scratch1/&lt;br /&gt;
	#cp -vr /fs/webarc3/data/wikipedia/preprocessed-monthly /scratch1/&lt;br /&gt;
	#chmod 644 /scratch1/preprocessed-monthly/trec-month-*.xml&lt;br /&gt;
	DATE=`date +&amp;quot;%m/%d/%y %H:%M:%S&amp;quot;`&lt;br /&gt;
	echo &amp;quot;$HOST($DATE): XML Copy Finished&amp;quot; &amp;gt;&amp;gt; $PROGFILE&lt;br /&gt;
	echo &amp;quot;$HOST($DATE): XML Copy Finished&amp;quot; &amp;gt; /scratch1/xml_copy_done&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
## WAIT UNTIL XML COPY IS DONE ##&lt;br /&gt;
while [ ! -f /scratch1/xml_copy_done ]; do&lt;br /&gt;
	sleep 60;&lt;br /&gt;
done&lt;br /&gt;
&lt;br /&gt;
## MAKE LOCAL PARAMETER FILES ##&lt;br /&gt;
DATE=`date +&amp;quot;%m/%d/%y %H:%M:%S&amp;quot;`&lt;br /&gt;
echo &amp;quot;$HOST($DATE): Making parameter files ($d)&amp;quot; &amp;gt;&amp;gt; $PROGFILE&lt;br /&gt;
outfile=/scratch1/month-$d.params&lt;br /&gt;
echo &amp;quot;&amp;lt;parameters&amp;gt;&amp;quot; &amp;gt; $outfile&lt;br /&gt;
echo &amp;quot;  &amp;lt;index&amp;gt;/scratch1/month-$d&amp;lt;/index&amp;gt;&amp;quot; &amp;gt;&amp;gt; $outfile&lt;br /&gt;
echo &amp;quot;  &amp;lt;indexType&amp;gt;indri&amp;lt;/indexType&amp;gt;&amp;quot; &amp;gt;&amp;gt; $outfile&lt;br /&gt;
echo &amp;quot;  &amp;lt;corpus&amp;gt;&amp;quot; &amp;gt;&amp;gt; $outfile&lt;br /&gt;
echo &amp;quot;    &amp;lt;path&amp;gt;/scratch1/preprocessed-monthly/trec-month-$d.xml&amp;lt;/path&amp;gt;&amp;quot; &amp;gt;&amp;gt; $outfile&lt;br /&gt;
echo &amp;quot;    &amp;lt;class&amp;gt;trectext&amp;lt;/class&amp;gt;&amp;quot; &amp;gt;&amp;gt; $outfile&lt;br /&gt;
echo &amp;quot;  &amp;lt;/corpus&amp;gt;&amp;quot; &amp;gt;&amp;gt; $outfile&lt;br /&gt;
echo &amp;quot;&amp;lt;/parameters&amp;gt;&amp;quot; &amp;gt;&amp;gt; $outfile&lt;br /&gt;
&lt;br /&gt;
outfile=/scratch1/month-$d-co.params&lt;br /&gt;
echo &amp;quot;&amp;lt;parameters&amp;gt;&amp;quot; &amp;gt; $outfile&lt;br /&gt;
echo &amp;quot;  &amp;lt;index&amp;gt;/scratch1/month-$d&amp;lt;/index&amp;gt;&amp;quot; &amp;gt;&amp;gt; $outfile&lt;br /&gt;
echo &amp;quot;  &amp;lt;indexType&amp;gt;indri&amp;lt;/indexType&amp;gt;&amp;quot; &amp;gt;&amp;gt; $outfile&lt;br /&gt;
echo &amp;quot;  &amp;lt;corpus&amp;gt;&amp;quot; &amp;gt;&amp;gt; $outfile&lt;br /&gt;
echo &amp;quot;    &amp;lt;path&amp;gt;/scratch1/month-$d-co&amp;lt;/path&amp;gt;&amp;quot; &amp;gt;&amp;gt; $outfile&lt;br /&gt;
echo &amp;quot;    &amp;lt;class&amp;gt;trectext_from_bdb&amp;lt;/class&amp;gt;&amp;quot; &amp;gt;&amp;gt; $outfile&lt;br /&gt;
echo &amp;quot;  &amp;lt;/corpus&amp;gt;&amp;quot; &amp;gt;&amp;gt; $outfile&lt;br /&gt;
echo &amp;quot;&amp;lt;/parameters&amp;gt;&amp;quot; &amp;gt;&amp;gt; $outfile&lt;br /&gt;
&lt;br /&gt;
## START INDEXING ##&lt;br /&gt;
DATE=`date +&amp;quot;%m/%d/%y %H:%M:%S&amp;quot;`&lt;br /&gt;
echo &amp;quot;$HOST($DATE): Indexing Carry-Overs ($d)&amp;quot; &amp;gt;&amp;gt; $PROGFILE&lt;br /&gt;
~/webarc/lemur-4.10/bin/IndriBuildIndex /scratch1/month-$d-co.params 2&amp;gt;&amp;amp;1 &amp;gt;&amp;gt; $PROGFILE &lt;br /&gt;
DATE=`date +&amp;quot;%m/%d/%y %H:%M:%S&amp;quot;`&lt;br /&gt;
echo &amp;quot;$HOST($DATE): Indexing Fresh Docs ($d)&amp;quot; &amp;gt;&amp;gt; $PROGFILE&lt;br /&gt;
~/webarc/lemur-4.10/bin/IndriBuildIndex /scratch1/month-$d.params 2&amp;gt;&amp;amp;1 &amp;gt;&amp;gt; $PROGFILE&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
## COPY INDEX ##&lt;br /&gt;
DATE=`date +&amp;quot;%m/%d/%y %H:%M:%S&amp;quot;`&lt;br /&gt;
echo &amp;quot;$HOST($DATE): Copying Index ($d)&amp;quot; &amp;gt;&amp;gt; $PROGFILE&lt;br /&gt;
scp -i ~/webarc/id_rsa -r /scratch1/month-$d scsong@naraapp03:/fs/webarc3/data/wikipedia/lemur_index/monthly/  #scp due to privilege issue&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
## STAGE OUT LOCAL DATA ##&lt;br /&gt;
#DATE=`date +&amp;quot;%m/%d/%y %H:%M:%S&amp;quot;`&lt;br /&gt;
#echo &amp;quot;$HOST($DATE): Staging Out Local Data ($d)&amp;quot; &amp;gt;&amp;gt; $PROGFILE&lt;br /&gt;
#rm -f month-$d-co.params&lt;br /&gt;
#rm -f month-$d.params&lt;br /&gt;
#\rm -rf ./month-$d&lt;br /&gt;
#\rm -rf ./month-$d-co&lt;br /&gt;
&lt;br /&gt;
DATE=`date +&amp;quot;%m/%d/%y %H:%M:%S&amp;quot;`&lt;br /&gt;
echo &amp;quot;$HOST($DATE): Index FINISHED!! ($d)&amp;quot; &amp;gt;&amp;gt; $PROGFILE&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;/div&gt;</summary>
		<author><name>Scsong</name></author>
	</entry>
</feed>