import sys
import os as os
import getopt
import os.path

# Java general imports
import java.io.FileReader as FileReader
import java.lang.StringBuffer as StringBuffer
import java.lang.Boolean as Boolean
import java.lang.String as String

# Weka general imports
import weka.filters.Filter as Filter
import weka.core.Instances as Instances
import weka.core.Utils as Utils
import weka.core.AttributeStats as AttributeStats
import weka.classifiers.Evaluation as Evaluation
import weka.core.Range as Range
import weka.filters.unsupervised.instance.RemoveWithValues as RemoveWithValues
import weka.filters.unsupervised.attribute.Remove as AttributeRemove


# load modules to search for the best parameters
from smo_param_search import *
from logistic_param_search import *
from simplelogistic_param_search import *
from smobagging_param_search import *
from logisticbagging_param_search import *
from bayesian_param_search import *
from randomforest_param_search import *
from simplelogisticadaboost_param_search import *


EXEC_NAME = sys.argv[0]

def usage():
  """usage information"""
  print """
%(EXEC)s--
  This code looks for the best parameters for classification of ADNI-data.
  It produces the best parameters of the following classifiers:
	1. SMO 
	2. Bagging of SMO
	3. Logistic
	4. Bagging of Logistic
	5. Simple Logistic
	6. AdaboostM1 over Simple-Logistic
	7. Random Forest
	8. Naive Bayes



Usage: %(EXEC)s [options]

Options:
  [-a --arffFile]             Specify the input arff file assumed to be csv (MANDATORY to use)
  [-c --csvFile]	      Specify the input-output data file assumed to be csv (MANDATORY to use)
  [-i --idFlag]               If it is used, it means that the last feature is IDs
  [-w --weightFlag]           If it is used, instances would be weighted according to number of samples in the corresponding class
  [-r --rmClass]              If it is used, all instances specified by this options will be removed from data set
  [-n --nameClass]	      Name of classification (MANDATORY to use)
  [-l --listOfClassifiers]     Specify the list of classifiers (Optional)

  

Examples:
  %(EXEC)s -a CV(3_10)-train-exp702-Features.arff  -c  CV(3_10)-train-exp702-BestParams.csv  -n "AD vs NC"
    It works on the the arff file and save the results in the csv file

Examples:
  %(EXEC)s -a CV(3_10)-train-exp702-Features.arff  -c  CV(3_10)-train-exp702-BestParams.csv -i -w -r 2   -n "AD vs NC"
    It works on the the arff file and save the results in the csv file


""" % {'EXEC':EXEC_NAME}


def	PreprocessData(Data,option):
        if (option['idFlag']):    # means that the last attribute is id
            attributeremove = AttributeRemove()
            attributeremove.setInvertSelection(Boolean(False))  # remove IDs from dataset
            attributeremove.setAttributeIndices(String(str(Data.numAttributes())))
            attributeremove.setInputFormat(Data)
            Data = Filter.useFilter(Data, attributeremove)
	# set the class Index - the index of the dependent variable
	Data.setClassIndex(Data.numAttributes() - 1)
	# remove of the classes
        if (option['rmClassFlag']):    # means that instances with specified class label must be removed
            ClassLabel = option['rmClass']
            removewithvalues = RemoveWithValues()
            removewithvalues.setAttributeIndex(String('last'))
            removewithvalues.setNominalIndices(String(str(ClassLabel)))
            removewithvalues.setInputFormat(Data)
            newData = Filter.useFilter(Data, removewithvalues)
        else:
            newData = Data
        if (option['weightFlag']):    # it means that instances should be weighted according to number of samples
            # if there is only two classes, do it as before
            if (Data.numClasses()==2):
                # weight instances with reciprocal weight with number of samples
                numInstancesC1 = 0
                numInstancesC2 = 0
                # get numerical value of the class attribute for the first class because we don't know it
                classLabel = newData.instance(1).classAttribute()
                c1 = newData.instance(1).value(classLabel)
                # find number of instances per class
                for   cnt  in   range(0,newData.numInstances()):
                    if (newData.instance(cnt).value(classLabel) == c1):
        		numInstancesC1 = numInstancesC1 + 1
    		    else:
        		numInstancesC2 = numInstancesC2 + 1
                # calculate weights
                weightC1 = numInstancesC2 /(numInstancesC2 + numInstancesC1 + 0.0)
                weightC2 = numInstancesC1 /(numInstancesC2 + numInstancesC1 + 0.0)
                # assign weight to instances of classes
                for cnt in range(0,newData.numInstances()):
                    if (newData.instance(cnt).value(classLabel) == c1):
            		newData.instance(cnt).setWeight(weightC1)
        	    else:
            		newData.instance(cnt).setWeight(weightC2)
            # if number of class are more than two then .... 
            elif (Data.numClasses()>2):
                numClasses = Data.numClasses()
                stats = Data.attributeStats(Data.classIndex())
                AttributeStats = stats.nominalCounts
                classLabels = Data.instance(1).classAttribute()
                # assign weight to instances of classes
                cnt = 0
                sumWeigths = 0.0
                numInstancesPerClass = {}
                weightPerClass = {}
                mapClassLabels = {}
                for e in classLabels.enumerateValues():
                    numInst = AttributeStats[cnt] + 0.0
                    w = 1.0 / numInst
                    mapClassLabels.update({e:cnt})
                    weightPerClass.update({cnt:w})
                    numInstancesPerClass.update({cnt:numInst})
                    sumWeigths = sumWeigths + w
                    cnt = cnt + 1 

                # normalize weights
                for k in weightPerClass.keys():
                    weightPerClass[k] = weightPerClass[k]/sumWeigths

                for cnt in range(0,newData.numInstances()):
                    w = weightPerClass[ newData.instance(cnt).value(classLabels) ]
                    newData.instance(cnt).setWeight(w)
	return newData

	

# This function calls pyxel.py command which is already publicly available to write csv file
def 	WriteCsvFile(CsvFilename,ID,Header,Value):
	outPath = os.path.dirname(CsvFilename)
	csvCommand = "pyxel.py "   #csvCommand = "/sbiasfw/lab/testing/bin/pyxel.py "
	cmdLine = csvCommand + " -d %s  %s  %s  %s -o %s" %\
		 ( '"' + CsvFilename + '"' ,\
	   	   '"' + ID + '"' ,\
		   '"' + Header + '"' , \
		   '"'  + Value + '"' ,
		   '"'  + outPath + '"')
	print "Saving to .csv file: " + cmdLine
	os.system(cmdLine)
	

# This function takes five arguments and is aware what each class label means
def	StoreInCSVTable(CsvFilename,classifierName,caseName,accValue,Description,outParama):
	ID = classifierName
	header = caseName
	val = str(accValue)
	WriteCsvFile(CsvFilename,classifierName,header,val)
	header = 'Description of the classifier (' + caseName + ')'
	val = Description
	WriteCsvFile(CsvFilename,classifierName,header,val)
	header = 'Parameters (' + caseName + ')'
	val = str(outParama)
	WriteCsvFile(CsvFilename,classifierName,header,val)


# This function read cases text file
#cases = {}
#def     readCases(FN):
#        fid = open(FN,'r')
#        lines = fid.readlines()
#        for l in lines:
#            tmp = l.split('\t')
#            cases.update({int(float(tmp[0])):tmp[1][:-1]})   # to avoid \n
#        fid.close()



def main():
	try:
          opts, args = getopt.getopt(sys.argv[1:], "ha:c:iwr:n:l:",\
      		["help", "arffFile=", "csvFile=","idFlag","weightFlag","rmClass=","nameClass=","listOfClassifiers="])
  
  	except getopt.GetoptError, err:
    		usage()
    		print err


        idFlag = False
        weightFlag = False
        rmClassFlag = False
        rmClass = 0
        listOfClassifier = ['Logistic','Bagging Logistic','SMO','Bagging SMO','Simple Logistic','Bayesian','Random Forest']
  	for o, a in opts:
    		if o in ("-h", "--help"):
      			usage()
      			sys.exit(0)
    		elif o in ("-a", "--arffFile"):
      			arffFile = a
    		elif o in ("-c", "--csvFile"):
      			CsvFilename = a
                elif o in ("-i","--idFlag"):
                    	idFlag = True
                elif o in ("-w","--weightFlag"):
                    	weightFlag = True
                elif o in ("-r","--rmClass"):
                    	rmClassFlag = True
                    	rmClass = int(float(a))
		elif o in ("-n","--nameClass"):
			classifierName = a
                elif o in ("-l","--listOfClassifiers"):
                        listOfClassifier = a.split(',')
    		else:
      			assert False, "unhandled option"

  	if len(opts) < 3:
    		usage()
    		return 1


	# load data file
	print "Loading data..."
	print "-------------- Input arffFile: %s" % arffFile
	print "-------------- Output CsvFile: %s" % CsvFilename
	# make sure that csv file does not exist there and you are creating it for the first time
	if os.path.exists(CsvFilename):
		os.remove(CsvFilename)
	file = FileReader(arffFile)
	data = Instances(file)


        # remove one of the classes andweight instances properly to compensate imbalanced number of intances
        options = {'idFlag':idFlag, 'weightFlag': weightFlag, 'rmClassFlag': rmClassFlag, 'rmClass': rmClass}
	newData = PreprocessData(data,options)
	# Iterate over schmes to find optimal sets of parameters for each classifier
        if ('SMO' in listOfClassifier):
          # ----- SMO
	  OptSMOIsRBF, OptSMO, OptSMOp1, OptSMOp2, OptSMOAcc, Description = SMO_ParamFinder(newData)
	  outParam = (OptSMOIsRBF, OptSMOp1, OptSMOp2)
	  StoreInCSVTable(CsvFilename,'SMO',classifierName,OptSMOAcc,Description,outParam)
        if ('Bagging SMO' in listOfClassifier):
	  # ----- Bagging SMO
	  IsOptBagOnOptSMO, OptBagSMO,  OptBagSMOp1, OptBagSMOp2, OptBagSMOAcc, Description = \
	  BaggingSMO_ParamFinder(newData, OptSMOIsRBF, OptSMOp1, OptSMOp2)
	  outParam = (IsOptBagOnOptSMO, OptBagSMOp1, OptBagSMOp2)
	  StoreInCSVTable(CsvFilename,'Bagging SMO',classifierName,OptBagSMOAcc,Description,outParam)
        if ('Logistic' in listOfClassifier):
	  # ----- Logistic
	  OptLog, OptLogp1, OptLogp2, OptLogAcc, Description = Logistic_ParamFinder(newData)
          outParam = (OptLogp1, OptLogp2)
          StoreInCSVTable(CsvFilename,'Logistic',classifierName,OptLogAcc,Description,outParam)
        if ('Bagging Logistic' in listOfClassifier):
          # ----- Bagging Logistic
          IsOptBagOnOptLog, OptBagLog,  OptBagLogp1, OptBagLogp2, OptBagLogAcc, Description  = \
                BaggingLogistic_ParamFinder(newData, OptLogp1, OptLogp2)
          outParam = (IsOptBagOnOptLog, OptBagLogp1, OptBagLogp2)
          StoreInCSVTable(CsvFilename,'Bagging Logistic',classifierName,OptBagLogAcc,Description,outParam)
        if ('Simple Logistic' in listOfClassifier):
          # ----- Simple Logistic
          OptSimpLog, OptSimpLogp1, OptSimpLogp2, OptSimpLogAcc, Description = \
                SimpleLogistic_ParamFinder(newData)
          outParam = (OptSimpLogp1, OptSimpLogp2)
          StoreInCSVTable(CsvFilename,'Simple Logistic',classifierName,OptSimpLogAcc,Description,outParam)
        # ----- Boosted Simple Logistic
        #IsOptBoostOnOptSimpLog, OptBoostSimpLog, OptBoostSimpLogp1, OptBoostSimpLogp2,  OptBoostSimpLogAcc, Description \
        #		 = AdaBoostedSimpleLogistic_ParamFinder(newData, OptSimpLogp1, OptSimpLogp2)
        #outParam = (IsOptBoostOnOptSimpLog, OptBoostSimpLogp1, OptBoostSimpLogp2)
        #StoreInCSVTable(CsvFilename,'Boosted Simple Logistic',classifierName,OptBoostSimpLogAcc,Description,outParam)
        if ('Bayesian' in listOfClassifier):
          # ----- Find the best configuration for Bayesian classifier 
          IsOptMultinomialBayes, IsOptNaiveKernelDensity, OptBayesAcc, Description = Bayes_ParamFinder(newData)
          outParam = (IsOptMultinomialBayes, IsOptNaiveKernelDensity)
          StoreInCSVTable(CsvFilename,'Bayesian',classifierName,OptBayesAcc,Description,outParam)
        if ('Random Forest' in listOfClassifier):
          # ----- Find the best parameter for Random-Forest classifier
          OptRndFrst, OptRndFrstp1, OptRndFrstp2, OptRndFrstAcc, Description = RandomForest_ParamFinder(newData)
          outParam = ( OptRndFrstp1, OptRndFrstp2)
          StoreInCSVTable(CsvFilename,'Random Forest',classifierName,OptRndFrstAcc,Description,outParam)



if __name__ == '__main__': main()
