#!/usr/bin/env python
import os.path
import os
import sys
import getopt
import ConfigParser
import pyxel as PyXel
import getopt
import time
import tempfile
import numpy as np
import scipy as sp


MetaCorrespondence = {'Bagging Logistic': 'Logistic',\
		      'Bagging SMO':'SMO',\
		      'Boosted Simple Logistic':'Simple Logistic'}


EXEC_NAME = sys.argv[0]

def usage():
  """usage information"""
  print """
%(EXEC)s--
This wrapper script executes individual steps of an experiment.

  STEP 1:  Learn the best basis vectors
  STEP 2:  Extract features and save into an .arff file
  STEP 3:  Find the best parameters and save into .csv files
  STEP 4:  Train the classifier with best parameters and apply to the training and testing data
  STEP 5:  Summarize and make a report


Usage: %(EXEC)s [options]

Required Options:
  [-c --configFile]             Specify config file.
  [-s --perfSteps]              Specify step to be performed.
  
Options:
  [-v --verboseOnly]            Print commands on screen instead of executing on SGE.
  [-u --continue]               Continue processing from a previously saved .mat file.
  [-m --modeAve]                Specify the method for computing the average accuracy (default: 1)
                                    1:  collect all data into one table and use it to compute the average accuracy
                                    2:  compute the average on each fold then average the results. This mode will also report standard deviation.



Examples:
  %(EXEC)s -c  /sbia/comp_space/batmangn/Projects/NMFTV/results/exp1602_10foldCV/exp1602_10foldCV.config  -s 1
    Perform steps 1 using experiment parameters in the specified config file.
	

  
""" % {'EXEC':EXEC_NAME}



def     computeAccuracy(p):
        # find out home many classes are in the experiment
        numSamples = {}      # this is a list containing list of labels
        labels = p.column(col=2)[1]   # get the column of the actual labels
        for l in labels:
            if not(l in numSamples.keys()):
                numSamples.update({l:0})
        numLabels = len(numSamples.keys()) 
        # count number of samples per class
        labels = p.column(col=2)[1]
        for l in labels:
            numSamples[l] = numSamples[l] + 1  
        numLabels = len(numSamples.keys())
        confusionMatrix = sp.mat( np.zeros( (numLabels,numLabels) ) ) 
        hdr_actual = 'Class_Label-actual'
        hdr_predic = 'Class_Label-prediction'
        mapLabels = {}
        cnt = 0
        for l in numSamples.keys():
           mapLabels.update({l:cnt})
           cnt = cnt + 1
        confusionMatrix = sp.mat( np.zeros( (numLabels,numLabels) ) )  
	hdr_actual = 'Class_Label-actual'
        hdr_predic = 'Class_Label-prediction'
        actualLabels = p.column(hdr_actual)
        predictLabels = p.column(hdr_predic)
        for cnt in range(0,len(actualLabels[0])):
            pLabel = mapLabels[predictLabels[1][cnt]]     # predicted label
            aLabel = mapLabels[actualLabels[1][cnt]]      # actual Label
            if ( not(  predictLabels[0][cnt] ==   actualLabels[0][cnt]   ) ):     # it is just a sanity check, this event should happen ever. Just to make sure that it is comparing the same subject
                  assert False, "This event should NOT happend ever !!! Are you sure you are using the correct Pyxel version??? I am comparing labels of two different subjects !! " 
            #confusionMatrix[aLabel,pLabel] = confusionMatrix[aLabel,pLabel] + 1.0/numSamples[actualLabels[1][cnt]]
            confusionMatrix[aLabel,pLabel] = confusionMatrix[aLabel,pLabel] + 1.0
        return confusionMatrix


 

# this function summarize the results into tex and csv files
def     makeSummaryFile(title,csvTrainFile_List,csvTestFile_List,resFile,testHdrTemplate,aveMode):
        # find out home many classes are in the experiment
        numSamples = {}      # this is a list containing list of labels
        for fn in csvTestFile_List:
            p = PyXel.Pyxel(fn)
            labels = p.column(col=2)[1]   # get the column of the actual labels
            for l in labels:
                  if not(l in numSamples.keys()):
                        numSamples.update({l:0})
        numLabels = len(numSamples.keys()) 
        # read all test csv files and keep them into a single file
        totalNumSamplesPerClass = []
        res_test  = PyXel.Pyxel()
        for fn in csvTestFile_List:
            p = PyXel.Pyxel(fn)
            totalNumSamplesPerClass.append(len(p.getIds()))
            res_test = res_test + p
            res_test.contract()
        # count number of samples per class
        labels = res_test.column(col=2)[1]
        for l in labels:
            numSamples[l] = numSamples[l] + 1.0  
        # Do one of the following: aveMode=1) collect accuracy of all experiments and avearge, or aveMode=2) collect all csv file into one csv file and compute average
        if (aveMode==1):
              confusionMatrix = computeAccuracy(res_test) 
        elif (aveMode==2):
             confusionMatrix = [] 
             for fn in csvTestFile_List:
                 p = PyXel.Pyxel(fn)
                 tmpConfMatrix = computeAccuracy(p)
                 confusionMatrix.append(tmpConfMatrix)
        else:
            assert False, "unkown aveMode for the makeSummaryFile!! aveMode in the makeSummaryFile should be either 1 or 2 !!!!" 
        # write a report to the text file
        text_file = open(resFile + ".txt", "a+")
        line = ">>>>> Results of the testing for " + title + " <<<<<<< "
        print line
        text_file.writelines(line + '\n')
        line = "Here is (or list of) confusion matrix(es)  or its average depending on options chosen in the command line (notice that the numbers are NOT percentage but number of correctly classified samples) : "
        print line
        text_file.writelines(line + '\n')
        if (aveMode==1):
            line = "confusion Matrix  =  "
            print line
            text_file.writelines(line + '\n')
            for l in confusionMatrix:
                line = str(l) 
                print line
                text_file.writelines(line + '\n')
            line = "  \n"
            print line
            text_file.writelines(line + '\n')
            line = "This is the normalized trace of the confusion matrix : " + str(confusionMatrix.trace()/np.sum(totalNumSamplesPerClass))
            print line
            text_file.writelines(line + '\n')
        elif (aveMode==2):
            cnt = 0
            accList = []
            for cfMat in confusionMatrix:
                 line = "confusion Matrix  =  "
                 print line
                 text_file.writelines(line + '\n')
                 for l in cfMat:
                      line = str(l) 
                      print line
                      text_file.writelines(line + '\n')
                 line = "  \n"
                 print line
                 text_file.writelines(line + '\n')
                 line = "This is the normalized trace of the confusion matrix : " + str(cfMat.trace()/totalNumSamplesPerClass[cnt])
                 print line
                 text_file.writelines(line + '\n')
                 accList.append(cfMat.trace()/totalNumSamplesPerClass[cnt])
                 cnt = cnt + 1
            line = "Avearage Accuracy [std] : %f [%f] "%(np.mean(accList),np.std(accList)) 
            print line
            text_file.writelines(line + '\n')
        text_file.close()
        
         


        


# depend on inputs either it parse the csv file and find the best parameters and related params 
# or if Name is given returns the best parameters and related parameter to that
def     FindBestClassifierParam(BestParamCSVFN,hdr,Name):
	pyxel = PyXel.Pyxel()
	print "BestParamCSVFN : %s \n"% (BestParamCSVFN)
	print "hdr:", hdr
	pyxel.load(BestParamCSVFN)
	hdr_fix = hdr.replace(' ','_')     # all space characters should be changed to _ to appeal Stathis !? :)
	paramhdr = 'Parameters (' + hdr + ')'
	paramhdr_fix = paramhdr.replace(' ','_')
	res = pyxel.column(hdr)
	print "res: ", res
	best_acc = 0
	if (Name==''):
		for i in range(0,len(res[1])):
			if best_acc < float(res[1][i]):
				best_acc = float(res[1][i])
				best_cls = res[0][i]
				#print 'paramhdr_fix:', paramhdr_fix
				Param = pyxel.grab(best_cls,paramhdr)
	else:
		best_cls = Name
		best_acc = pyxel.grab(best_cls,hdr)
		Param = pyxel.grab(best_cls,paramhdr)		
	# check whether the best classifier is a meta-classifier
	if  (best_cls.find('Bag') >= 0) or (best_cls.find('Boost') >= 0):
		core_cls = MetaCorrespondence[best_cls]     # get the core classifier
		extraParam = pyxel.grab(core_cls,paramhdr)
	else:
		extraParam = ''	
	return best_cls, Param, extraParam

 

def main():
	try:
		opts, args = getopt.getopt(sys.argv[1:], "hc:s:vum:",\
      		["help", "configFile=", "perfSteps=","verboseOnly","continue","modeAve"])
  
  	except getopt.GetoptError, err:
    		usage()
    		print err

	numReqOpt = 0   # number of required options
	verboseOnlyFlag = False
	continueFlag = 'false'
        modeAve = 1
  	for o, a in opts:
            if o in ("-h", "--help"):
                usage()
                sys.exit(0)
            elif o in ("-c", "--configFile"):
                configFn = a
                numReqOpt = numReqOpt + 1
            elif o in ("-s", "--perfSteps"):
                stepList = a.split(',')
                numReqOpt = numReqOpt + 1
		print "step list : ", stepList
            elif o in ("-m","--modeAve"):
                modeAve = int(float(a))
                if not( (modeAve==1) or (modeAve==2) ):
                   print "ERROR: modeAve must be either 1 or 2 !!! "
                   usage()
                   return 1
	    elif o in ("-v","--verboseOnly"):
		verboseOnlyFlag = True
	    elif o in ("-u","--continue"):
		continueFlag = 'true'
            else:
                assert False, "unhandled option"

        if numReqOpt < 2:
            usage()
            return 1
              
        # STEP 0: parse options and read configuration file
        config = ConfigParser.ConfigParser() 
        config.read(configFn)
        if ('SETTINGS' in config.sections()):     # this cofiguration has a general setting
            # this field containts list of experiments to perform
            sections_list = config.get('SETTINGS','sections').split(',')
        else:
            # if the the config file does not have SETTING section, it is probably a simple config file
            if ('5' in stepList):
                assert False, "if you want to summerize your experiment, your config file must have a SETTINGS section!"
            sections_list = config.sections()
            
       
        # STEP 1: learn the best basis vectors
        if ('1' in stepList):
            jobsList = []
            for s in sections_list:
                expName = config.get(s,'expName')
                trainingIdFn    = config.get(s,'trainingIdFn')
                trainingListFn  = config.get(s,'trainingListFn')
                execLauncherFn = config.get(s,'execLauncherFn')
                execBinFn = config.get(s,'execBinFn')
                basisVectorFn = config.get(s,'basisVectorFn')
                cmdLine =  "%(launcherExec)s  " +   \
                            "%(execBinFn)s  " +  \
                            "what learn   " +   \
                            "expnum %(expName)s   " +  \
                            "continueFlag  %(continueFlag)s  " +  \
                            "ImageList  %(trainingListFn)s   " + \
                            "ResFilename   %(basisVectorFn)s"
                cmdLine = cmdLine\
                            %{'launcherExec':execLauncherFn, 'execBinFn':execBinFn, \
                              'expName':expName, 'continueFlag':continueFlag, \
                              'trainingListFn':trainingListFn, 'basisVectorFn':basisVectorFn}
                print "cmdLine :", cmdLine
		if not(verboseOnlyFlag):
			fi,fo,fe=os.popen3(cmdLine)
                	stdOutput = fo.readlines()
                	jobID = stdOutput[0].split()[2]
                	jobsList.append(jobID)

           

        # STEP 2: extract features and save it into a arff file
        # extract features for training
        if ('2' in stepList):
            jobsList = []
	    expName = config.get('SETTINGS','expName')
            for s in sections_list:
                expName = config.get(s,'expName')
                trainingIdFn    = config.get(s,'trainingIdFn')
                trainingListFn  = config.get(s,'trainingListFn')
                trainingArffFn  = config.get(s,'trainingArffFn')
                testingIdFn    = config.get(s,'testingIdFn')
                testingListFn  = config.get(s,'testingListFn')
                testingArffFn  = config.get(s,'testingArffFn')
                execLauncherFn = config.get(s,'execLauncherFn')
                execBinFn = config.get(s,'execBinFn')
                basisVectorFn = config.get(s,'basisVectorFn')
                cmdLine =  "%(launcherExec)s  " +   \
                            "%(execBinFn)s  " +  \
                            "what   FeatureExtr " +   \
                            "expnum %(expName)s   " +  \
                            "ImageList  %(trainingListFn)s   " + \
                            "ResFilename   %(basisVectorFn)s  " + \
                            "IdListFile  %(trainingIdFn)s  " + \
                            "ArffFileName  %(trainingArffFn)s " 
                cmdLine = cmdLine\
                           %{'launcherExec':execLauncherFn, 'execBinFn':execBinFn, \
                              'expName':expName, 'basisVectorFn':basisVectorFn, \
                              'trainingListFn':trainingListFn, 'trainingIdFn':trainingIdFn, 'trainingArffFn':trainingArffFn}
  
                
                print "cmdLine : ", cmdLine
                if not(verboseOnlyFlag):
		   fi,fo,fe=os.popen3(cmdLine)
                   stdOutput = fo.readlines()
                   jobID = stdOutput[0].split()[2]
                   jobsList.append(jobID) 
                #cmdLine  = "%s %s FeatureExtr  %s false %s %s %s %s"%\
                #        (execLauncherFn,execBinFn,expName,basisVectorFn,testingListFn,testingArffFn,testingIdFn)
                cmdLine =  "%(launcherExec)s  " +   \
                            "%(execBinFn)s  " +  \
                            "what   FeatureExtr " +   \
                            "expnum %(expName)s   " +  \
                            "ImageList  %(testingListFn)s   " + \
                            "ResFilename   %(basisVectorFn)s  " + \
                            "IdListFile  %(testingIdFn)s  " + \
                            "ArffFileName  %(testingArffFn)s " 
                cmdLine = cmdLine\
                           % {'launcherExec':execLauncherFn, 'execBinFn':execBinFn, \
                              'expName':expName, 'basisVectorFn':basisVectorFn, \
                              'testingListFn':testingListFn, 'testingIdFn':testingIdFn, 'testingArffFn':testingArffFn}
                print "cmdLine :", cmdLine
                if not(verboseOnlyFlag):
		   fi,fo,fe=os.popen3(cmdLine)
                   stdOutput = fo.readlines()
                   jobID = stdOutput[0].split()[2]
                   jobsList.append(jobID) 
		#if not(verboseOnlyFlag):
		#	fi,fo,fe=os.popen3(cmdLine)
                #	stdOutput = fo.readlines()
                #	jobID = stdOutput[0].split()[2]
                #	jobsList.append(jobID)


        # STEP 3: find the best parameters and save it into .csv files
        if ('3' in stepList):
            jobsList = []
            for s in sections_list:
                trainingArffFn  = config.get(s,'trainingArffFn')
                paramSearchLauncherFn = config.get(s,'paramSearchLauncherFn')
                paramSearchBinFn = config.get(s,'paramSearchBinFn')
                bestParamCsvFn = config.get(s,'bestParamCsvFn')
                className = config.get(s,'className')
                searchOpt = config.get(s,'searchOpt')
                classifierList = config.get(s,'classifierList')
                cmdLine = '%s %s --arffFile=%s  --csvFile=%s  --nameClass="%s" %s  --listOfClassifiers="%s"'%\
                        (paramSearchLauncherFn,paramSearchBinFn,trainingArffFn,bestParamCsvFn,className,searchOpt,classifierList)
                print "cmdLine :", cmdLine
		if not(verboseOnlyFlag):
			fi,fo,fe=os.popen3(cmdLine)
                	stdOutput = fo.readlines()
                	jobID = stdOutput[0].split()[2]
                	jobsList.append(jobID)
       

        # STEP 4: train the classifier with best parameters and apply then on the training
        if ('4' in stepList):
            jobsList = []
            for s in sections_list:
                    trainingArffFn  = config.get(s,'trainingArffFn')
                    testingArffFn  = config.get(s,'testingArffFn')
                    classifierLauncherFn = config.get(s,'classifierLauncherFn')
                    classifierBinFn = config.get(s,'classifierBinFn')
                    classifierList = config.get(s,'classifierList')
                    bestParamCsvFn = config.get(s,'bestParamCsvFn')
                    className = config.get(s,'className')
                    trainHdrTemplate = config.get(s,'trainHdrTemplate')
                    testHdrTemplate = config.get(s,'testHdrTemplate')
                    trainResultTemplate = config.get(s,'trainResultTemplate')
                    testResultTemplate  = config.get(s,'testResultTemplate')
                    removeOpt = config.get(s,'removeOpt') 
                    hdrTrain = trainHdrTemplate%(s)
                    hdrTest  = testHdrTemplate
                    for classifierName in classifierList.split(','):
                        best_cls, bestParam, extraParam = \
                            FindBestClassifierParam(bestParamCsvFn,className,classifierName)
                        trainResultCsv = trainResultTemplate%(best_cls)
                        testResultCsv = testResultTemplate%(best_cls)
			if (removeOpt!=''):
				cmdLine = '%s %s \
                                	--trainArff="%s"  --testArff="%s" \
                                	--bestClassifier="%s" --bestParam="%s"  --extraParam="%s" \
                                	--removeLabel="%s"  --trainCSV="%s"  --testCSV="%s"\
                                	--hdrTrain="%s" --hdrTest="%s" '%\
                                	(classifierLauncherFn,classifierBinFn,\
                                	trainingArffFn,testingArffFn,\
                                	classifierName,bestParam,extraParam,\
                                	removeOpt,trainResultCsv,testResultCsv,\
                                	hdrTrain,hdrTest)
			else:
                                cmdLine = '%s %s \
                                	--trainArff="%s"  --testArff="%s" \
                                	--bestClassifier="%s" --bestParam="%s"  --extraParam="%s" \
                                	--trainCSV="%s"  --testCSV="%s"\
                                	--hdrTrain="%s" --hdrTest="%s" '%\
                                	(classifierLauncherFn,classifierBinFn,\
                                	trainingArffFn,testingArffFn,\
                                	classifierName,bestParam,extraParam,\
                                	trainResultCsv,testResultCsv,\
                                	hdrTrain,hdrTest)
                        print "cmdLine :", cmdLine
			if not(verboseOnlyFlag):
				fi,fo,fe=os.popen3(cmdLine)
                        	stdOutput = fo.readlines()
                        	jobID = stdOutput[0].split()[2]
                        	jobsList.append(jobID)
          

        # STEP 5: summerize and make a report
        if ('5' in stepList):
            jobsList = []
            testHdrTemplate = config.get('SETTINGS','testHdrTemplate')
            classifierList  = config.get('SETTINGS','classifierList')
            testHdrTemplate =  config.get('SETTINGS','testHdrTemplate')
            className       = config.get('SETTINGS','className')
            SummaryFileTemplate = config.get('SETTINGS','SummaryFileTemplate')
            expName = config.get('SETTINGS','expName')
            for classifierName in classifierList.split(','):
                resFn = SummaryFileTemplate%(expName,classifierName)
                print "resFn: ", resFn
                print "sections_list :", sections_list
                csvTestFile_List = []
                csvTrainFile_List = []
                for s in sections_list:   
                    # make csvTestFile List
                    testResultTemplate  = config.get(s,'testResultTemplate')
                    testResultCsv = testResultTemplate%(classifierName)
                    csvTestFile_List.append(testResultCsv)
                    # make csvTrainFile List
                    trainResultTemplate = config.get(s,'trainResultTemplate')
                    trainResultCsv = trainResultTemplate%(classifierName)
                    csvTrainFile_List.append(trainResultCsv)
                makeSummaryFile(className,csvTrainFile_List,csvTestFile_List,resFn,testHdrTemplate,modeAve)
   


if __name__ == '__main__': main()

