#!/bin/bash

# This is the main file to build and test a classifier, which calls programs
# for feature extraction, feature selection, classifier training, and/or classifier testing.

# last modified : Aug 26, 2009 by Stathis Kanterakis
# yong.fan@ieee.org, kanterae@uphs.upenn.edu

SVNFILEVER="$Id: Compare.sh 35 2010-09-22 14:41:15Z batmangn@UPHS.PENNHEALTH.PRV $";
SVN_FILE_VERSION="$Id: Compare.sh 35 2010-09-22 14:41:15Z batmangn@UPHS.PENNHEALTH.PRV $";
SVN_REV="";
EXEC_NAME="Compare";

# Set the PATH to include the libexec directory for the package...this should be relative to the installation
# directory of this script
basedir=`dirname $0`
libexec=${basedir}/../libexec
if [ ! -d $libexec ] ; then
	echo "Could not find libexec directory at \"$libexec\"" 1>&2 
	exit 1
fi

# check for required executables
for exe in COMPARE_extract COMPARE_check_input COMPARE_average_score COMPARE_build COMPARE_build_loo COMPARE_build_check COMPARE_SVM_discriminate COMPARE_map_svm_diff_to_region COMPARE_float_sum COMPARE_float_sum_m COMPARE_model COMPARE_score_mean COMPARE_score_sum 
do
	if [ ! -e ${libexec}/$exe ] ; then
		echo "Could not find required executable \"$exe\" in \"$libexec\"" 1>&2
		exit 1
	fi
done

# We're OK to go on...
export PATH="${libexec}:${PATH}"


version(){
  echo " $EXEC_NAME Version information";
  echo "  Release          : $RELEASE_ID";
  echo "  Svn Revision     : $SVN_REV";
  echo "  Svn File Version : $SVNFILEVER";
  echo "NOTE: This version depends on makeNiftiHeader.py revision 220 and later" 
}

usage() {
  echo
  echo "  This program trains a classifier via leave-one-out cross-validation and outputs the classification rates";
  echo
  echo "  Usage: $EXEC_NAME (data.lst) (result_file) [options]";
	echo
  echo "  Required parameters:";
  echo "    data.lst      : the input file containing the location of the data";
  echo "    result_file   : text file for the output";
  echo
  echo "  Optional parameters:";
  echo "    [-k    <int>      ]  std value for Gaussian kernel in SVM (range:1~10000. This value is multiplied by 100! default:1)";
  echo "    [-j    <int>      ]  searching range for std value (should be greater than above value, optional)";
  echo "    [-c    <int>      ]  trade-off value in SVM (range:1~10000, default: 10)";
  echo "    [-t    file_name  ]  subject list to be classified by the classifier trained (default: no testing)";
  echo "    [-T    <float>    ]  randomly leave this % out from data.lst for testing (overrides -t, default: no testing)";
  echo "    [-s    <float>    ]  Gaussian kernel size for smoothing score map (default: 3.0, range:1~10)";
  echo "    [-n    <int>      ]  searching space of feature number (range:1~1500, default: 150)";
  echo "    [-m    model_name ]  name of a file to store the trained classifier (default: no output)";
  echo "    [-M    <int>      ]  starting point of a range of features for final classification (default: 1)";
  echo "    [-N    <int>      ]  ending point of a range of features for final classification (default: 150)";
  echo "    [-S    spatial_map]  prefix of name of group difference spatial maps (in float format, one for each feature) (defalut: no output)";
	echo
  echo "    [-v]   version information";
  echo "    [-h]   this message";
  echo "    [-u]   this message";
  echo 
  echo  "    Note:  COMPARE extracts features directly from .img files and does not read corresponding header files; please make sure that all image are 'signed-short' (2-byte) images and all have the same orientation and registered to the same template. In addition, class labels are either '1' or '-1'."; 
  echo
  echo "  More information:";
  echo "    https://www.rad.upenn.edu/sbia/software/";
  echo "    sbia-software@uphs.upenn.edu";
  echo

  echo "  Suggested use: $EXEC_NAME data.lst data.out -j2 -T20 -mdata.mdl -Sdata_map";
	echo "    where \"data\" is the name of your dataset";
	echo
}

checkandexit(){
  if [ $1 != 0 ] ; then
    # There was an error, bail out!
    echo "$2" 1>&2
    exit $1
  fi
}


if [ $# -lt 2 ]; then
  while getopts  vhu opt
  do
   case "$opt" in
      v) version; exit 0;;
      h) usage; exit 0;;
      u) usage; exit 0;;
      *) usage; exit 1;;
    esac
  done
  usage;
  exit 1;
fi

train_name=$1;
result=$2;

#set default 
svm_kernel=1;
svm_kernel1=1;
svm_c=10;
test_name=0;
smooth_size=3.0;
num_feature=150;
model_name=0;
num_f_start=0;
num_f_end=0;
spatial_map=0;
leave_perc_out=0;
skiptraining=0;

#parse options

shift;
shift;
#set OPTERR 0;

while getopts  k:j:c:t:s:n:m:M:N:S:T:p:v opt
do
 #echo $opt
 case "$opt" in
    k) svm_kernel="$OPTARG";;
    j) svm_kernel1="$OPTARG";;
    c) svm_c="$OPTARG";;
    t) test_name="$OPTARG";;
    s) smooth_size="$OPTARG";;
    n) num_feature="$OPTARG";;
    m) model_name="$OPTARG";;
    M) num_f_start="$OPTARG";;
    N) num_f_end="$OPTARG";;
    S) spatial_map="$OPTARG";;
		T) leave_perc_out="$OPTARG";;
		p) skiptraining="$OPTARG";;
    v) version; exit 0;;
    h) usage; exit 0;;
    u) usage; exit 0;;
    *) usage; exit 1;;
  esac
done

echo " checking input ...";
COMPARE_check_input $train_name $test_name
checkandexit $? "Error checking input!"

train_para="train_para.txt";
if [ -e $train_para ]; then
    for i in 1 2 3 4 5 6; do
     if [ $i -eq 1 ]; then 
			read train_root_dir;
     elif [ $i -eq 2 ]; then 
			read train_num_subj 
     elif [ $i -eq 3 ]; then
			read train_num_feat
     elif [ $i -eq 4 ]; then
			read xdim
     elif [ $i -eq 5 ]; then
			read ydim
     elif [ $i -eq 6 ]; then
			read zdim
     fi
    done < $train_para
 else
   echo "Error: could not open input for training and testing!";
   exit 1;
fi

# get a header man!
ahdr="$train_root_dir/`awk 'NR==5' $train_name | cut -d' ' -f1 | cut -f1`"
ahdr="${ahdr%.*}.hdr"

if [ $skiptraining -eq 0 ]; then

	if [ $leave_perc_out -gt 0 ]; then
			MAX=$train_num_subj;
			NUM_LEFT=$(( $MAX * $leave_perc_out / 100 ));
			NUM=$(( $MAX - $NUM_LEFT ));
			MIN=5;

			RAND=$(awk -v NUM=$NUM -v MAX=$MAX -v MIN=$MIN '
			BEGIN {
				srand()
				for (j = 0; j < NUM; j++) {
					# loop to find a not-yet-seen selection
					do {
						select = MIN + int(rand() * (MAX - 1))
					} while (select in pick)
					pick[select] = select
				}
				for (j in pick) 
					printf("%s ", pick[j])
			}');
			#echo "`echo $RAND | wc -w` selections made!";

			to_train="${train_name%.*}_totrain.lst";
			to_test="${train_name%.*}_totest.lst";
			if [ -f $to_train ]; then
				/bin/rm $to_train
			fi
			if [ -f $to_test ]; then
				/bin/rm $to_test
			fi
			
			for i in $RAND; do
				awk NR==$i $train_name >> $to_train
			done
			sort -o ${to_train}.tmp $to_train
			echo "$NUM	$train_num_feat" > $to_train
			echo "$xdim $ydim $zdim" >> $to_train
			echo "$train_root_dir" >> $to_train
			cat ${to_train}.tmp >> $to_train

			awk 'NR>3' $train_name > ${to_train}.tmp
			sort -o ${to_train}.tmp ${to_train}.tmp
			cat ${to_train}.tmp > ${train_name%.*}_sorted.lst

			echo "$NUM_LEFT	$train_num_feat" > $to_test
			echo "$xdim $ydim $zdim" >> $to_test
			echo "$train_root_dir" >> $to_test
			diff ${train_name%.*}_sorted.lst $to_train | grep \< | cut -d' ' -f2- >> $to_test

			/bin/rm -f ${to_train}.tmp ${train_name%.*}_sorted.lst
			num_subject=$NUM;
			train_num_subj=$NUM;
			train_name=$to_train;
			test_name=$to_test;
			COMPARE_check_input $train_name $test_name
      checkandexit $? "Error checking input!"
	else
			num_subject=$train_num_subj;
	fi

	data_root_dir=$train_root_dir;

	echo " extracting features ...";

	i=0;
	while [ $i -lt $train_num_subj ]; do
		if [ ! -f "r_roi_train_${i}.txt" ]; then
	  	COMPARE_extract $train_name $i -sigma $smooth_size > a.log
      checkandexit $? "Error extracting features: `cat a.log | tail -1`!"
		fi
  	i=$(( $i + 1 ));
	done

	# determining the best features
	base_dir=`pwd`;
	train_file_base="r_roi_train";
	test_file_base="r_roi_test";

	f_num_feature=$(( $num_f_end - $num_f_start + 1 ));

	echo $f_num_feature > COMPARE_f_num_feature.bin


	echo " building classifier ...";
	if [ $svm_kernel1 -lt $svm_kernel ]; then
    	svm_kernel1=$svm_kernel;
	fi

	start_svm_k=$(( $svm_kernel * 100 ));


	old_svm_kernel=$svm_kernel;

	while [ $svm_kernel -le $svm_kernel1 ]; do
    	svm_k=$(( $svm_kernel * 100 ));

    	if [ $num_subject -lt 40 ]; then
      	 COMPARE_build_loo $base_dir $num_subject $train_file_base $test_file_base -num_feature $num_feature -c $svm_c -std $svm_k -start_point $num_f_start -end_point $num_f_end > a.log
         checkandexit $? "Error building classifier: `cat a.log | tail -1`!"
    	else
      	 COMPARE_build $base_dir $num_subject $train_file_base $test_file_base -num_feature $num_feature -c $svm_c -std $svm_k -start_point $num_f_start -end_point $num_f_end > a.log
         checkandexit $? "Error building classifier: `cat a.log | tail -1`!"
    	fi	

    	i=0;
    	while [ $i -lt $train_num_subj ]; do
					/bin/mv order_${i}.bin ${svm_kernel}_order_${i}.bin
        	j=0;
        	while [ $j -lt $f_num_feature ]; do 
	   				/bin/mv svm_model_${i}_${j}.bin ${svm_kernel}_svm_model_${i}_${j}.bin
          	 j=$(( $j + 1 ));
		 			done
    			i=$(( $i + 1 ));
    	done

    	svm_kernel=$(( $svm_kernel + 1 ));
	done

	end_svm_k=$svm_k;


	COMPARE_build_check classification_rate $start_svm_k $end_svm_k
  checkandexit $? "Error checking classification rate!"
  
	read optimal_svm_kernel < COMPARE_svm_kernel_size.bin;

	i=0;
	while [ $i -lt $train_num_subj ]; do
    	/bin/cp ${optimal_svm_kernel}_order_${i}.bin order_${i}.bin
    	j=0;
    	while [ $j -lt $f_num_feature ]; do 
      	/bin/cp  ${optimal_svm_kernel}_svm_model_${i}_${j}.bin svm_model_${i}_${j}.bin
      	j=$(( $j + 1 ));
    	done
			i=$(( $i + 1 ));
	done

	while [ $start_svm_k -le $end_svm_k ]; do
		svm_k=$(( $start_svm_k / 100 ));
		i=0;
		while [ $i -lt $train_num_subj ]; do
	  	 /bin/rm ${svm_k}_order_${i}.bin
    	 j=0;
    	 while [ $j -lt $f_num_feature ]; do 
	    	/bin/rm ${svm_k}_svm_model_${i}_${j}.bin
	   		j=$(( $j + 1 ));
	  	 done
    	 i=$(( $i + 1 ));
  	done

		start_svm_k=$(( $start_svm_k + 100 ));
	done

	/bin/rm r_roi_train* r_roi_test*

	svm_k=$(( $optimal_svm_kernel * 100 ));
	old_name="classification_rate_${svm_k}.bin";

	/bin/mv $old_name $result

	if [ $svm_kernel1 -gt $old_svm_kernel ]; then
		/bin/rm classification_rate_*.bin
	fi
	/bin/rm ranking_rate_*.bin

	start_id=0;
	end_id=$(( $train_num_feat - 1 ));

	if [ $spatial_map != "0" ]; then
  	echo " computing group difference maps ...";
  	j=0;
  	while [ $j -lt $f_num_feature ]; do 
    	i=0;
    	while [ $i -lt $train_num_subj ]; do
      	 #echo "COMPARE_SVM_discriminate svm_model_${i}_${j}.bin COMPARE_diff.txt"; 
      	 COMPARE_SVM_discriminate svm_model_${i}_${j}.bin COMPARE_diff.txt > a.log
         checkandexit $? "Error discriminationg between classes: `cat a.log | tail -1`!"
      	 COMPARE_map_svm_diff_to_region COMPARE_spatial_map_${i} r_roi_feature_location_${i}.txt order_${i}.bin COMPARE_diff.txt $start_id $end_id w_roi_region_${i} $xdim $ydim $zdim > a.log
         checkandexit $? "Error creating group difference map: `cat a.log | tail -1`!"
      	 i=$(( $i + 1 ));
			done

			i=0;
  	 	while [ $i -le $end_id ]; do
      	COMPARE_float_sum ${spatial_map}_${i}.img COMPARE_spatial_map diff_${i}.img 0 $train_num_subj $xdim $ydim $zdim > a.log
        checkandexit $? "Error summing spatial maps: `cat a.log | tail -1`!"
      	i=$(( $i + 1 ));
  	 	done

  	 	#below is to average the score maps
  	 	if [ $j == "0" ]; then
      	i=0;
      	while [ $i -le $end_id ]; do
        	 /bin/cp ${spatial_map}_${i}.img tmp_${spatial_map}_${i}.img
        	 i=$(( $i + 1 ));
      	 done
  	 	else
      	i=0;
      	while [ $i -le $end_id ]; do
        	 COMPARE_float_sum_m ${spatial_map}_${i}.img tmp_${spatial_map}_${i}.img $xdim $ydim $zdim > a.log
           checkandexit $? "Error summing spatial maps: `cat a.log | tail -1`!"
        	 i=$(( $i + 1 ));
      	 done
  	 	fi

  		j=$(( $j + 1 ));
  	done

  	i=0;
		
  	while [ $i -le $end_id ]; do
  	 /bin/rm ${spatial_map}_${i}.img
  	 /bin/mv tmp_${spatial_map}_${i}.img ${spatial_map}_${i}.img
  	 which makeNiftiHeader.py >> /dev/null
  	 checkandexit $? "Need makeNiftiHeader.py to make headers for spatial maps!"
		 makeNiftiHeader.py -d ${spatial_map}_${i}.img -b 4 -c $ahdr
  	 i=$(( $i + 1 ));
  	done
  	/bin/rm COMPARE_spatial_map_* COMPARE_diff.txt
	fi

fi

if [ $test_name != "0" ]; then
  
  echo " classifying testing subjects ...";
  #test models on test data set

  test_para="test_para.txt";
  if [ -e $test_para ]; then
		for i in 1 2 3 4 5 6; do
	    if [ $i -eq 1 ]; then 
				read test_root_dir;
	    elif [ $i -eq 2 ]; then 
				read test_num_subj 
	    elif [ $i -eq 3 ]; then
				read test_num_feat
	    elif [ $i -eq 4 ]; then
				read xdim
	    elif [ $i -eq 5 ]; then
				read ydim
	    elif [ $i -eq 6 ]; then
				read zdim
	    fi
		done < $test_para
  else
       echo "Error: could not open input for testing!";
       exit 1;
  fi

  tnum_subject=$test_num_subj;

  j=0;
  while [ $j -lt $f_num_feature ]; do
    i=0;
    while [ $i -lt $train_num_subj ]; do
      base_dir=`pwd`;
      feature_location_file="${base_dir}/r_roi_feature_location_${i}.txt";
      region_file="${base_dir}/r_roi_region_${i}";
      feature_order="${base_dir}/order_${i}.bin";
      svm_model_file="${base_dir}/svm_model_${i}_${j}.bin";

      COMPARE_model $test_name $region_file $feature_location_file $feature_order $svm_model_file > a.log
      checkandexit $? "Error classifying testing subjects: `cat a.log | tail -1`!"
      
      test_score_file="test_score_file_${i}.txt";
      /bin/mv svm_score.bin $test_score_file
    
      i=$(( $i + 1 ));
    done
    
    test_result_file="test_${result}";
    COMPARE_average_score test_score_file $train_num_subj $tnum_subject $test_result_file > a.log
    checkandexit $? "Error averaging testing scores: `cat a.log | tail -1`!"
    
    if [ $j == "0" ]; then
      /bin/cp $test_result_file tmp_${test_result_file}   
    else
      COMPARE_score_sum $test_result_file tmp_${test_result_file} $tnum_subject
      checkandexit $? "Error summing test scores: `cat a.log | tail -1`!"
    fi
    
    j=$(( $j + 1 ));
  done

	COMPARE_score_mean $test_result_file tmp_${test_result_file} $tnum_subject $f_num_feature
  checkandexit $? "Error computing test mean!"

	/bin/rm tmp_${test_result_file} test_score_file_*.txt test_sub* test_para.txt
fi

#save model file
if [ $model_name != "0" ]; then
  echo $train_num_subj > r_roi_feature_num.txt
  echo " saving model ...";
  tar -cf model.tar r_roi_feature*.txt r_roi_region*.bin order_*.bin svm_model*.bin r_roi_feature_num.txt train_para.txt COMPARE_f_num_feature.bin
  gzip -f9 model.tar
  /bin/mv model.tar.gz $model_name
fi

/bin/rm r_roi_feature_*.txt r_roi_region_*.bin order_*.bin svm_model_*.bin
/bin/rm w_roi_region_*.bin  w_roi_feature_location_*.txt w_roi_t*.txt
/bin/rm COMPARE_f_num_feature.bin
/bin/rm a.log
/bin/rm train_sub* train_para.txt

echo " finished. ";

exit 0;
