#!/usr/bin/ksh

#------------------------------------------------------------
#Centralized cpu finder architecture
#This file does some error checking
#Then notifies master cpufinder that a job is pending
#Finds available cpus, checks on errors, checks for licenses
#syntax: cpu_finder lastscript 
#------------------------------------------------------------

#-----------------------------------------------
# set some defaults
#-----------------------------------------------
#--------------------------------
#set environment variables
#--------------------------------
. $WFU_PATH/global.site.ksh

username=`whoami`
processid=`echo $$`
let sleeptime=polltime+20 
localhost=`hostname`
lastscript=$1
lastline=`echo $script_path/lastline`
getline=`echo $script_path/getline`
error_recovery=`echo $script_path/error_recovery`
#---------------------------------------------------
#extract the scriptname and pathname from the script
#construct the batchlog and batchque filenames
#---------------------------------------------------
batchdir=`extract_pname $lastscript`
scriptname=`extract_fname $lastscript`
batchlog=$batchdir/batch_log
batchque=$batchdir/batch_que

if test $DEBUG -eq 1 
	then echo entered cpufinder on $localhost with $lastscript >>$batchlog
fi


#------------------------------------------------------------------
#check if previous cpu_finder is already running in this directory
#if we are still running, sleep awhile until it terminates
#also check for errors
#------------------------------------------------------------------
cpu_alive_loop=1
while test $cpu_alive_loop -eq 1
	do
	cpufinder_alive=`ls $batchdir | grep cpufinder_alive`
	len=${#cpufinder_alive}
	cpu_alive_loop=0
	if test $len -gt 0
		then cpu_alive_loop=1
		sleep $polltime
		runtime=$SECONDS
		error_flag=`check_errors $batchdir $runtime`
		if test $error_flag -eq 1
			then cpu_alive_loop=0
			echo error caught in initial cpufinder loop >> $batchlog
			#k=`rm $batchdir/running*`
			exit
		fi

	fi
done

#----------------------------------
#remove any previous running files
#drop in the cpufinder_alive file
#---------------------------------
k=`rm $batchdir/running*`
cpufinderfile=$batchdir/cpufinder_alive
echo rsh $localhost kill -9 $$ > $cpufinderfile


#--------------------------------------
#Get the current system time
#-------------------------------------
current_time=`$script_path/systime`


#----------------------------
#get a current timestamp
#----------------------------
cdate=`date`
if test $rsh_flag -gt 0
	then cdate=`rsh $timekeeper date`
	current_time=`rsh $timekeeper $script_path/systime`
fi
db=`echo ' | ' `


#---------------------------------
#read the next script from the que
#---------------------------------
next_script=`$getline $batchque 1`


#------------------------------------------
#check if the last script didn't complete
#------------------------------------------
len=${#next_script}
if test $len -gt 0
	then job_done=0
	if test $lastscript = $next_script
		then message1=`echo $lastscript$db TERMINATED on $db$db$cdate$db$db$current_time`
		rm $batchdir/spmbatchlicense*
		message2=`echo 'ERROR THIS SCRIPT NEVER COMPLETED' `
		echo $batchdir
		echo $message1 >> $batchlog
		echo ------------------------------------------------ >> $batchlog
		echo $current_time>>$batchlog
		echo $message2 >> $batchlog
		echo ------------------------------------------------ >> $batchlog
		#echo mail_script $batchdir $rsh_flag ERROR
		k=`rm $cpufinderfile`
		error_recovery_flag=`$error_recovery $batchdir`
		if test $error_recovery_flag -eq 1
			then echo 'Error Recovery In Effect' >> $batchlog
			cd $batchdir
			startup=`$script_path/quick_start $batchdir error_recovery=$error_recovery_flag`
			exit
		fi
		encoded_mailfile=`uumail $batchdir`
		mail_script $batchdir $rsh_flag ERROR $encoded_mailfile
		exit
	fi
fi

#----------------------------------------------
#timestamp the batch_log with a completed script
#----------------------------------------------
message1=`echo $lastscript$db completed on $db$db$cdate$db$db$current_time`
echo $message1 >> $batchlog


#-----------------------------------------
#check if all scripts have been completed
#-----------------------------------------
len=${#next_script}
job_done=0
if test $len -eq 0
	then job_done=1
	rm $batchdir/spmbatchlicense*
     	message1=$db$db$db$cdate$db$db$current_time
	message2=`echo 'This script is done   ' `
	echo $message1 >> $batchlog
	echo ---------------------------- >> $batchlog
	echo $message2 >> $batchlog
	k=`rm $cpufinderfile`
	encoded_mailfile=`uumail $batchdir`
	er_flag=`ls $batchdir/error_recovery* 2>/dev/null`
	er_len=${#er_flag}
	completion_string=`echo COMPLETED`
	if test $er_len -gt 0
		then completion_string=`echo COMPLETED_WITH_T3_ERROR_RECOVERY`
	fi
	mail_script $batchdir $rsh_flag $completion_string $encoded_mailfile
	exit
fi


#--------------------------------------------
#Figure out what kind of license is required
#--------------------------------------------

matlabflag=`grep matlabx $next_script`
idlflag=`grep idlx $next_script`
matlabflag=${#matlabflag}
idlflag=${#idlflag}
licensetype=unix
if test $matlabflag -gt 0
	then licensetype=matlab
fi
if test $idlflag -gt 0
	then licensetype=idl
fi
#-----------------------------------------
# if this is just a unix license
# then just send the job out and exit
#-----------------------------------------
cd $batchdir
k=`rm cpu_found*`
if test $licensetype = unix
	then next_host=`hostname`
	message1=`echo $next_script$db sent to $db$next_host$db$cdate$db$db$current_time`
	echo $message1 >> $batchlog	
	launch_script $next_host $next_script $rsh_flag
	k=`rm $cpufinderfile`
	exit
fi

#----------------------------------------------------------------------------
#start the cpu_finder_server if it is not alive
#Create a job file in the que directory
#This will notify the centralized cpufinder of a job
#create a duplicate file in the batchdir so we know locally a job is pending
#-----------------------------------------------------------------------------
$script_path/start_cpufinder_server
next_scriptname=`extract_fname $next_script`
jobfile=`echo job$$_$next_scriptname`
masterque=$refdir/masterque
echo $jobfile $next_script $licensetype $localhost $processid $username > $masterque/$jobfile
echo $jobfile $next_script $licensetype $localhost $processid $username > $batchdir/$jobfile

#----------------------------------------------------------------------------
#Now wait for the cpu finder to give us a cpu#
#-----------------------------------------------------------------------------
cpu_found_file=$batchdir/cpu_found
$script_path/wait_for $cpu_found_file $batchdir

k=`rm $batchdir/$jobfile`
runtime=$SECONDS
error_flag=`check_errors $batchdir $runtime`


#---------------------------------------
#Make sure the file really appeared
#---------------------------------------
runfile=`ls $cpu_found_file`
len=${#runfile}
if test $len -eq 0
	then error_flag=1
fi

if test $error_flag -gt 0
	then cpu_loop=0
	cpu_not_running=0
	k=`rm $cpufinderfile`
	error_recovery_flag=`$error_recovery $batchdir`
	if test $error_recovery_flag -eq 1
		then echo 'Error Recovery In Effect' >> $batchlog
		cd $batchdir
		startup=`$script_path/quick_start $batchdir error_recovery=$error_recovery_flag`
		exit
	fi
	encoded_mailfile=`uumail $batchdir`
	mail_script $batchdir $rsh_flag ERROR $encoded_mailfile
	exit
fi


#------------------------------------------------------
#check if process is already running in this directory
#-----------------------------------------------------
#runfile=`ls $batchdir | grep running`
#len=${#runfile}
#if test $len -gt 0
#	then cpu_loop=0
#	cpu_not_running=0
#	k=`rm $cpufinderfile`
#	exit 
#fi

current_time=`$script_path/systime`
cdate=`date`
if test $rsh_flag -gt 0
	then cdate=`rsh $timekeeper date`
	current_time=`rsh $timekeeper $script_path/systime`
fi
next_host=`cat $cpu_found_file`
message1=`echo $next_script$db sent to $db$next_host$db$cdate$db$db$current_time`
echo $message1 >> $batchlog 
#echo launch_script $next_host $next_script $rsh_flag >> $batchlog
cd $batchdir
k=`rm cpu_found*`
launch=`launch_script $next_host $next_script $rsh_flag` &
rm $cpufinderfile
exit


