#!/usr/bin/perl

###################################################################################################################
#                                                                                                                 #
#    This is the main file to generate SVM model for WML segmentation using multispectral MRI (T1/T2/PD/FLAIR)    #
#    last modified: Oct 6, 2007                                                                                   #
#    Author: Zhiqiang Lao                                                                                         #
#    Comments: Zhiqiang.Lao@uphs.upenn.edu                                                                        #
#                                                                                                                 #
###################################################################################################################

$argc = @ARGV;
&show_usage if ( $argc < 1 );

############################################################
#          Setting up environment variables                #
############################################################
$sys_path = $ENV{'PATH'};
print "system path: $sys_path\n";
$package_path = &my_get_string("-p", *ARGV);
print "package path: $package_path\n";
$bin_path = "$package_path" . "/bin/";
$script_path = "$package_path" . "/scripts/";
$dependencies_path = "$package_path" . "/dependencies/";
$ENV{PATH}="$PATH:$sys_path:$bin_path:$script_path:$dependencies_path";

########################################
#       Reading input parameters       #
########################################
@default_dim       = (256,256,46); @dim = ();
@dim               = &my_get_string_list_with_default("-d", *ARGV, *default_dim);

@default_res       = (0.9375,0.9375,3.0); @res = ();
@res               = &my_get_string_list_with_default("-v", *ARGV, *default_res);

@default_weight    = (1.0,1.0,1.0,1.0); @w = ();
@w                 = &my_get_string_list_with_default("-w", *ARGV, *default_weight);

@default_threshold = (0.9,0.1);
@threshold         = &my_get_string_list_with_default("-t", *ARGV, *default_threshold);

$radius            = &my_get_string_with_default("-r", *ARGV, "1");
$max_iter          = &my_get_string_with_default("-i", *ARGV, "10");
$use_premask       = &my_get_string_with_default("-M", *ARGV, "0");
#$temp_path         = &my_get_string_with_default("-P", *ARGV, "\/tmp\/");
$temp_path         = "$package_path" . "/tmp/";

print "training image dimension:  ($dim[0], $dim[1], $dim[2])\n";
print "training image resolution: ($res[0], $res[1], $res[2])\n";
print "neighbor size: $radius\n";
print "max iter number: $max_iter\n";
print "weight: ($w[0], $w[1], $w[2], $w[3])\n";
print "use_premask: $use_premask\n";
print "path to put temporary result: $temp_path\n";

$max_iter = $max_iter - 1;
$t1path                        = "$package_path" . "\/sample\/train\/";
$t2path                        = $t1path;
$pdpath                        = $t1path;
$flpath                        = $t1path;
$manual_segmentation_path      = $t1path;
$manual_segmentation_open_path = $t1path;
$lesion_premask_path           = $t1path;

print "t1path:  $t1path\nt2path:  $t2path\npdpath:  $pdpath\nflpath:  $flpath\n";
print "manual_segmentation_path:  $manual_segmentation_path\nmanual_segmentation_open_path:  $manual_segmentation_open_path\n";
print "lesion_premask_path:  $lesion_premask_path\n";

open(CURRENT_DIR, "pwd|");
$current_dir = <CURRENT_DIR>;
close CURRENT_DIR;
chop($current_dir);

@subs = (); $ind = 0;
open LIST, "$ARGV[0]";
while ($rec = <LIST>) {
  chop($rec);
  $subs[$ind] = $rec;
  $ind = $ind + 1;
}
close LIST;
print "subs: @subs\n";

$temp_sub_folder = int(100000*rand ());
$temp_folder = "$temp_path\/$temp_sub_folder";
mkdir($temp_folder, 0755);
chdir("$temp_folder\/");

#############################################
#       Read input & define variables       #
#############################################
@T1Files = ();@T2Files = ();@PDFiles = ();@FLFiles = ();
@ManualMaskFile = ();@ManualMaskFile_open = ();@LesionPremaskFile = ();
$DimPara2D = "-d$dim[0],$dim[1]";$DimPara3D = "-d$dim[0],$dim[1],$dim[2]";
$WeightPara= "-w$w[0],$w[1],$w[2],$w[3]";
for ($i=0; $i<$ind; $i++) {
  system("cp $t1path\/$subs[$i].T1.byte.cbq.match.smooth.img .");		 $T1Files[$i] = "$subs[$i].T1.byte.cbq.match.smooth.img";
  system("cp $t2path\/$subs[$i].T2.byte.cbq.match.smooth.img .");		 $T2Files[$i] = "$subs[$i].T2.byte.cbq.match.smooth.img";
  system("cp $pdpath\/$subs[$i].PD.byte.cbq.match.smooth.img .");		 $PDFiles[$i] = "$subs[$i].PD.byte.cbq.match.smooth.img";
  system("cp $flpath\/$subs[$i].FL.byte.cbq.match.smooth.img .");		 $FLFiles[$i] = "$subs[$i].FL.byte.cbq.match.smooth.img";
  system("cp $manual_segmentation_path\/$subs[$i].lesion.mask.img .");           $ManualMaskFile[$i] = "$subs[$i].lesion.mask.img";
  system("cp $manual_segmentation_open_path\/$subs[$i].lesion.mask.open.img ."); $ManualMaskFile_open[$i]  = "$subs[$i].lesion.mask.open.img";
  system("cp $lesion_premask_path\/$subs[$i].lesion.premask.img .");             $LesionPremaskFile[$i] = "$subs[$i].lesion.premask.img";
}
print "total train subject number: $ind\n";

print "sub:  @subs\n";
print "T1Files: @T1Files\nT2Files: @T2Files\nPDFiles: @PDFiles\nFLFiles: @FLFiles\nManualMaskFile: @ManualMaskFile\n";
print "ManualMaskFile_open: @ManualMaskFile_open\nLesionPremaskFile: @LesionPremaskFile\n";

$average_manual_segmentation_voxel_number = &get_manual_segmentation_voxel_number(*ManualMaskFile, *dim);
print "average_manual_segmentation_voxel_number: $average_manual_segmentation_voxel_number\n";

#########################################################
#  create intermedicate folder prepared for processing  #
#########################################################

open MANUAL_MASK, ">manual_mask.lst";
open MANUAL_MASK_OPEN, ">manual_mask_dilate.lst";
for ($i=0; $i<$ind; $i++) {
  print MANUAL_MASK "$ManualMaskFile[$i]\n";
  print MANUAL_MASK_OPEN "$ManualMaskFile_open[$i]\n";
}
close MANUAL_MASK;
close MANUAL_MASK_OPEN;

##################################
#   Build up initial SVM model   #
##################################
@cmd_lesion = ();@cmd_nonlesion = ();
for ($i=0; $i<$ind; $i++) {
  $ImgFiles = "$T1Files[$i] $T2Files[$i] $PDFiles[$i] $FLFiles[$i]";
  $cmd = "WMLGetSelectedFeature $ImgFiles $ManualMaskFile[$i] $subs[$i].lesion.vec.0 $DimPara2D -t0 -m1 -n$average_manual_segmentation_voxel_number $WeightPara -r$radius";
  system($cmd) == 0 or die "$cmd failed: $?";
  $cmd = "WMLGetSelectedFeature $ImgFiles $ManualMaskFile[$i] $subs[$i].nonlesion.vec.0 $DimPara2D -t1 -m1 -n$average_manual_segmentation_voxel_number $WeightPara -r$radius";
  system($cmd) == 0 or die "$cmd failed: $?";
}
print "total subject number: $ind\n";

$iter = 0; $iterPlus1 = $iter + 1;
&create_training_file(*subs, $iterPlus1);
#$cmd = "SVMTorch -eps 0.0002 -std 2 -c 6000 train.example $ARGV[1].0";
$cmd = "SVMTorch -e 0.0002 -t 2 -std 6000 train.example $ARGV[1].0";
system($cmd) == 0 or die "$cmd failed: $?";
print "$ARGV[1].0 created\n";

#############################################################################
#                       Iteratively refine SVM model                        #
#############################################################################
if ($max_iter>2) { $max_iter = 2; }

open PROGRESS, ">progress.dat";
print PROGRESS "threshold\tTPVF\tFPVF\n=================================\n";
close PROGRESS;
$accuracy = 0.9;$svm_error = 0.005;
while (1) {

print "start of WHILE\n";

  $pre_iter = $iter;  $iter++;
  print "*****************************iteration $iter******************************************\n";

#############################################################################
#     Generate evaluation map using SVM model from previous iteration       #
#############################################################################
  for ($i=0; $i<$ind; $i++) {
    $ImgFiles = "$T1Files[$i] $T2Files[$i] $PDFiles[$i] $FLFiles[$i]";
    $cmd = "WMLTestSingleImageKeepAllFunctionValue_NoSmooth $ImgFiles $ARGV[1].$pre_iter $subs[$i].WML.mask.img.allvalue.$pre_iter $DimPara2D -m1 $WeightPara -r$radius -M$LesionPremaskFile[$i]";
    system($cmd) == 0 or die "$cmd failed: $?";
    $cmd = "InvertImgFloat $subs[$i].WML.mask.img.allvalue.$pre_iter $subs[$i].WML.mask.img.allvalue.invert.$pre_iter $DimPara2D";
    system($cmd) == 0 or die "$cmd failed: $?";
  }
  print "after get emap based on SVM model from previous iteration\n";

#############################################################################
#         threshold evaluation map to get binary lesion masks               #
#############################################################################
  $selected_threshold = 0.0;  $it = 1.0;  @err = (0.0,0.0,0.0);
  while (($err[0] < $accuracy) && ($it>-0.9)) {

    print "threshold: $it\n";
    for ($i=0; $i<$ind; $i++) {
      $cmd = "OneImgOp $subs[$i].WML.mask.img.allvalue.invert.$pre_iter $subs[$i].WML.mask.img.$pre_iter -t$it -m0 -f1";
      system($cmd) == 0 or die "$cmd failed: $?";
    }
    
    $it = $it - 0.1;

    open AUTO_MASK_LST, ">auto_mask.lst";
    for ($i=0; $i<$ind; $i++) { print AUTO_MASK_LST "$subs[$i].WML.mask.img.$pre_iter\n"; }
    $cmd = "WMLGetAutoManualMaskDiff manual_mask.lst auto_mask.lst err.dat $DimPara3D -r$res[0],$res[1],$res[2]";
    system($cmd) == 0 or die "$cmd failed: $?";
    
    open ERR, "err.dat";    $rec=<ERR>;    close ERR;
    @err = split(' ', $rec);
  }
  $accuracy = $accuracy - 0.02;
  print "after threshold evaluation map to get binary lesion masks\n";
  
  $selected_threshold = $it + 0.1;
  open PROGRESS, ">>progress.dat";  print PROGRESS "$selected_threshold\t$err[0]\t$err[1]\n";
  close PROGRESS;
  print "after writing PROGRESS\n";

  if (($err[0]>$threshold[0] && $err[1]<$threshold[1]) || ($iter>$max_iter)) {
    print "going to exit...\n";
    $iter = $iter - 1;
    $cmd = "cp $temp_folder\/$ARGV[1].$iter $current_dir\/$ARGV[1]"; system($cmd) == 0 or die "$cmd failed: $?";
    clean_intermediate_files($current_dir, $temp_folder, $ARGV[1]);
    exit(1);
  }

  for ($i=0; $i<$ind; $i++) {
    $ImgFiles = "$T1Files[$i] $T2Files[$i] $PDFiles[$i] $FLFiles[$i]";
    $cmd = "OpeningCloseingRegiongrowing3DBinaryImg_special $subs[$i].WML.mask.img.$pre_iter $subs[$i].WML.mask.img.$pre_iter.rg -t2 -v$res[2] -n1 -m1 -g1658";
    system($cmd) == 0 or die "$cmd failed: $?";
    $cmd = "MaskOutImg $subs[$i].WML.mask.img.$pre_iter.rg $ManualMaskFile_open[$i] $subs[$i].WML.mask.nonlesion.diff $DimPara3D -t2 -T0.08";
    system($cmd) == 0 or die "$cmd failed: $?";
    $cmd = "WMLGetSelectedFeature $ImgFiles $subs[$i].WML.mask.nonlesion.diff $subs[$i].nonlesion.vec.$iter $DimPara2D -t0 -m1 $WeightPara -r$radius -S";
    system($cmd) == 0 or die "$cmd failed: $?";
  }

  for ($i=0; $i<$ind; $i++) {
    system("cat $subs[$i].nonlesion.vec.$iter >> delta.nonlesion.dat");
  }
  $current_total_nonlesion_number = &get_file_linenumber("delta.nonlesion.dat");
  system("\\rm delta.nonlesion.dat") == 0 or die "rm failed: $?";

  print "current_total_nonlesion_number: $current_total_nonlesion_number\n";
  if ($current_total_nonlesion_number < 50) {
    print "no further nonlesion tissue need to be selected\n";
    $iter = $iter - 1;
    $cmd = "cp $temp_folder\/$ARGV[1].$iter $current_dir\/$ARGV[1]"; system($cmd) == 0 or die "$cmd failed: $?";
    clean_intermediate_files($current_dir, $temp_folder, $ARGV[1]);
    exit(2);
  }

  $iterPlus1 = $iter + 1;
  &create_training_file(*subs, $iterPlus1);

# $cmd = "SVMTorch -eps 0.0005 -std 2 -c 6000 train.example $ARGV[1].$iter";
  $cmd = "SVMTorch -e 0.0005 -t 2 -std 6000 train.example $ARGV[1].$iter";
  system($cmd) == 0 or die "$cmd failed: $?";
  unlink("train.example");
  $svm_error = $svm_error + 0.005;

  print "$ARGV[1].$iter created\n";
}


# ***************************************************************************
# call format: clean_intermediate_files($current_path, $temp_path, $model_prefix)
# ***************************************************************************
sub clean_intermediate_files
{
  local($lcurrent_path) = $_[0];
  local($ltemp_path)    = $_[1];
  local($lmdl_prefix)   = $_[2];

  my($lcmd);
  
  $lcmd = "$lcurrent_path\/"; system($cmd) == 0 or die "$lcmd failed: $?";
  $lcmd = "\\rm -fr $ltemp_path"; system($cmd) == 0 or die "$lcmd failed: $?";
}


# ***************************************************************************
# call format: create_training_file(*sub, $iter)
# ***************************************************************************
sub create_training_file
  {
    print "SUB create_training_file\n";
  
    local(*lsub) = $_[0];
    local($liter) = $_[1];
    
    my($i, $t, $total_line_number, $feature_length, $rec);
   
    open TRAIN_EXAMPLE, ">temp.example";
    for ($i=0; $i<$#lsub+1; $i++) {
      open TMP_LESION, "$lsub[$i].lesion.vec.0";
      while ($rec=<TMP_LESION>) {
	print TRAIN_EXAMPLE "$rec";
      }
      close TMP_LESION;
    }

    for ($t=0; $t<$liter; $t++) {
      for ($i=0; $i<$#lsub+1; $i++) {
        open TMP_NONLESION, "$lsub[$i].nonlesion.vec.$t";
        while ($rec=<TMP_NONLESION>) {
	  print TRAIN_EXAMPLE "$rec";
        }
        close TMP_NONLESION;
      }
    }
    close TRAIN_EXAMPLE;

    print "Before RemoveDuplicateElementFromLists...\n";

    system("RemoveDuplicateElementFromLists.pl temp.example temp.example.nodup");
    $total_line_number = &get_file_linenumber("temp.example.nodup");
    $feature_length = &get_file_columnnumber("temp.example.nodup");

    print "After RemoveDuplicateElementFromLists...\n";


    open TRAIN_EXAMPLE_HERE, ">train.example";
    print TRAIN_EXAMPLE_HERE "$total_line_number        $feature_length\n";
    open TEMP_EXAMPLE, "temp.example.nodup";
    for ($i=0; $i<$total_line_number; $i++) {
      $rec=<TEMP_EXAMPLE>;
      print TRAIN_EXAMPLE_HERE "$rec";
    }
    close TEMP_EXAMPLE;
    close TRAIN_EXAMPLE_HERE;
    unlink("temp.example");
    unlink("temp.example.nodup");
  }


# ***************************************************************************
# call format: get_manual_segmentation_voxel_number(*mask_list, *dim)
# ***************************************************************************
sub get_manual_segmentation_voxel_number
  {
    (*lmask_list) = $_[0];
    (*ldim) = $_[1];

    my($tmp_vol, $total_vol);
    $total_vol = 0;
    for ($i=0; $i<=$#lmask_list; $i++) {
      system("GetWholeImageVolume $lmask_list[$i] tmp.vol -d$ldim[0],$ldim[1],$ldim[2]");
      open TTT, "tmp.vol";
      $tmp_vol = <TTT>;
      close TTT;
      $total_vol = $total_vol + $tmp_vol;
    }

    unlink("tmp.vol");
    return ($total_vol/($#lmask_list+1));
  }

# ***************************************************************************
# call format: exec_cmd_parallel(*cmd, $number_process)
# ***************************************************************************
sub exec_cmd_parallel
  {
    my($number_process);
    (*llcmd) = $_[0];
    ($number_process) = $_[1];
    print "number of process: $number_process\n";

    for ($t=0; $t<1; $t++) {
      print "$llcmd[$t]\n";
    }

    my(@lkidpid,$p);
    @lkidpid = ();
    for ($p=0; $p<$number_process; $p++) {
      if (!defined($lkidpid[$p] = fork())) {
	die "cannot fork: $!";
      } elsif ($lkidpid[$p] == 0) {
	exec("$llcmd[$p]");
	exit;
      }
    }

    for ($p=0; $p<$number_process; $p++) {
      waitpid($lkidpid[$p], 0);
    }
  }

# ******************************************************************************
# call format: exec_cmd_parallel_limited_cpu(*cmd, $number_process, $number_cpu)
# ******************************************************************************
sub exec_cmd_parallel_limited_cpu  {

  my($number_process,$number_cpu);

  (*lcmd)         = $_[0];
  $number_process = $_[1];
  $number_cpu     = $_[2];
  
  my($number_process_remain);
  $number_process_remain = $number_process % $number_cpu;
  $number_task = ($number_process - $number_process_remain) / $number_cpu;
  
  my($i,$j);
  @cmd_here = ();
  for ($i=0; $i<$number_task; $i++) {
    for ($j=0; $j<$number_cpu; $j++) {
      $cmd_here[$j] = $lcmd[$i*$number_cpu+$j];
    }
    &exec_cmd_parallel(*cmd_here, $number_cpu);
  }

  if ($number_process_remain != 0) {
    for ($j=0; $j<$number_process_remain; $j++) {
      $cmd_here[$j] = $lcmd[$number_process-$number_process_remain+$j];
    }
    &exec_cmd_parallel(*cmd_here, $number_process_remain);
  }
}


# ***************************************************************************
# call format: my_get_string_with_default($option, *argument_list, $default)
# ***************************************************************************
sub my_get_string_with_default
  {
    $option = $_[0];
    (*arg_list) = $_[1];
    $default = $_[2];

    local($count) = 0;
    while ($count < ($#arg_list + 1))
      {
        if (index($arg_list[$count], $option) != -1)
          {
            return (substr($arg_list[$count], 2));
          }
        $count = $count + 1;
      }
    return $default;
  }

# **********************************************************************************
# call format: my_get_string($option, *argument_list)
# **********************************************************************************
sub my_get_string
{
   $option = $_[0];
   (*arg_list) = $_[1];

   local($count) = 0;
   while ($count < ($#arg_list + 1))
   {
      if (index($arg_list[$count], $option) != -1)
      {
         return (substr($arg_list[$count], 2));
      }
      $count = $count + 1;
   }
}

# **********************************************************************************
# call format: my_get_string_list_with_default($option, *argument_list, *default)
# **********************************************************************************
sub my_get_string_list_with_default
  {
   $option = $_[0];
   (*arg_list) = $_[1];
   (*default) = $_[2];

   local($count) = 0;
   while ($count < ($#arg_list + 1)) 
     {
       if (index($arg_list[$count], $option) != -1)
	 {
	   $tmp_str = substr($arg_list[$count], 2);
	   @lst = split(/,/, substr($arg_list[$count], 2));
	   return (@lst);
	 }
       $count = $count + 1;
     }
   return (@default);
 }

# **********************************************************************************
# call format: get_file_linenumber(file)
# **********************************************************************************
sub get_file_linenumber
  {
    $file = $_[0];

    open TMP, "$file";
    local($tmp_count) = 0;
    while ($record = <TMP>)
      {
	$tmp_count = $tmp_count + 1;
      }
    close TMP;

    return $tmp_count;
  }

# **********************************************************************************
# call format: get_file_columnnumber(file)
# **********************************************************************************
sub get_file_columnnumber
  {
    local($file) = $_[0];

    open TMP, "$file";
    local($col_rec) = <TMP>;
    close TMP;
    @ttmp = split(' ', $col_rec);

    return ($#ttmp+1);
  }

sub show_usage
{
  print "This program creates model file from a set of training set\n";
  print "for White Matter Lesion (WML) detection via adaBoost\n";
  print "\n\nUsage: $0 train_list model [options]\n\n";
  print " -d<int>,<int>,<int>  : dimension of input images in XY plane (default: 256,256,46)\n";
  print " -v<float>,<float>,<float>  : image resolution (default: 0.9375,0.9375,3.0)\n";
  print " -t<float>,<float>    : threshold for TPVF and FPVF (default: 0.9,0.1)\n";
  print " -i<int>              : maximum iteration number (default: 10)\n";
  print " -r<int>              : neighbor size (default: 1)\n";
  print " -M<int>              : use premask or not 0: not use 1: use (default: not use)\n";
  print " -p<string>           : path where this package installed\n";
  print " -P<string>           : path where to put intermediate result (default: /tmp)\n";
  print " -w <float>,<float>,<float>,<float> : weighting parameters for t1, t2, pd, flair respectively (default: 1.0,1.0,1.0,1.0)\n";
  print "\n contact: Zhiqiang.Lao\@uphs.upenn.edu\n";
  exit(1);
}
