package bl.coe;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;

import bl.coe.BigSparseMath.BigDenseArrayVector;
import bl.coe.BigSparseMath.BigSparseMatrix;
import bl.coe.BigSparseMath.BigSparseMatrixTools;
import bl.coe.BigSparseMath.LabelPairs;
import bl.coe.BigSparseMath.LabeledBigSparseMatrix;

/**
 * The Class KNNByRowsCrossValidation.
 * <p> <b><i>Program Usage:</i> bl.coe.KNNByRowsCrossValidation K[int] [matrix.bsm] [columns/label pairs.txt] 
 * SubSetSize[int] nSubsets[int](neg to indicate disjoint) [destination.txt] seedRandom<long> [optional: # threads]</b>
 * 
 * <p> Perform k-nearest neighbors (KNN) on the rows of a matrix (matrix.bsm) using the subset of 
 * columns with labels indicated in the LabelPairs text file. The procedure is repeated for nSubsets 
 * randomized subsets of size SubSetSize. For a subset size of Q: 
 * <ol>
 *  <li>	Disjoint subsets are created by randomizing the selection order of all labeled columns and using sequential blocks (in the randomized order) of Q labels. There are at most floor(N/Q) of these subsets per run.
 *  <li>	Overlapping subsets are created by randomizing the selection order of all labeled columns and using the first Q of the set. Randomization of the entire sequence is performed for each iteration. 
 * </ol>
 * 
 * <p> The results are written to a text file with 1 line per row. First with the # of neighbors used and followed by all possible nearest (with count = mode) 
 * neighbors written out for each row. The algorithm is as follows
 * <ol>
 *  <li>	Given a NxN matrix with labels on M columns. 
 *  <li>	Extract a NxM sub-matrix
 *  <li>	For each row, find the K nearest neighbors with values >0. 
 *  <li>	Caveat: If there are less than K but at least 1 non-zero neighbor, use the <K-NN (fall back to reduced k-NN using the highest valid k) and repeat
 *  <li>	Caveat: If there are more than K elements with the same score as the Kth score, then all are used
 *  <li>	Report all labels for which all labels which correspond to valid modes. 
 *  <li>	Caveat: If there are no non-zero elements on the row, report label -1. 
 * 	</ol> 
 * 
 * <p> The matrix is saved in a big-endian binary format. See the BigSparseMatrix class below.
 * 
 * @see bl.coe.BigSparseMath.BigMathPreferences
 * @see bl.coe.BigSparseMath.BigSparseMatrix
 * @see bl.coe.BigSparseMath.LabelPairs
 * @see bl.coe.BigSparseMath.BigSparseMatrixTools
 * 
 * @author Bennett Landman, bennett.landman@vanderbilt.edu
 */
public class KNNByRowsCrossValidation {

	/**
	 * The main method.
	 * 
	 * @param args the arguments
	 */
	public static void main(String []args){
		System.out.println("bl.coe.KNNByRowsCrossValidation K<int> <matrix.bsm> <columns/label pairs.txt> SubSetSize<int> nSubsets<int>(neg to indicate disjoint) <destination file tag> seedRandom<long> [optional: # threads]");
		if(args.length!=6 && args.length!=7)
			return;
		int k = Integer.valueOf(args[0]);
		int nThreads =1;
		long randomSeed = Long.valueOf(args[6]);
		if(args.length==8)
			nThreads = Integer.valueOf(args[7]);
		int subsetSize = Integer.valueOf(args[3]);
		int nSubsets = Integer.valueOf(args[4]);
		boolean disjoint = nSubsets<0;
		if(disjoint)
			nSubsets=-nSubsets;
		System.out.println("Performing cross-validation "+k+"-NN with "+nThreads+" parallel thread(s)");
		System.out.println("Subset size: "+subsetSize+" and attempting "+nSubsets+(disjoint?" disjoint ":" overlapping ")+"subsets");
		BigSparseMatrix mat =null;
		long tic=System.currentTimeMillis();
		try {
			System.out.println("Loading matrix (this might take a while)...");
			mat= new BigSparseMatrix(args[1]);
		} catch (IOException e) {
			System.out.println("Failed: "+e.getMessage());
			e.printStackTrace();
			return;
		}
		long toc=System.currentTimeMillis()-tic;
		System.out.println("Loaded:");		
		System.out.println(args[1]);
		BigSparseMatrixTools.printStat(mat);
		System.out.println("Loaded in:         "+toc/1000.f+" s");

		System.out.println("Loading labels "+args[2]);
		LabelPairs lp;
		try {
			lp = new LabelPairs(args[2]);
		} catch (IOException e1) {
			e1.printStackTrace();
			return;
		} 
		BigSparseMatrixTools.printStat(lp);

		System.out.println("Randomization Seed: "+randomSeed);
		LabelPairs []lpSubset;
		if(disjoint)
			lpSubset = lp.createRandomizedDisjointSubsets(subsetSize, nSubsets,randomSeed);
		else 
			lpSubset = lp.createRandomizedOverlappingSubsets(subsetSize, nSubsets,randomSeed);
		System.out.println("Identified "+lpSubset.length+" subsets of size "+subsetSize+" in "+lp.getNumberOfLabelPairs()+" label pairs");

		long firstTic = System.currentTimeMillis();
		for(int jXvalRun=0;jXvalRun<lpSubset.length;jXvalRun++) {
			String format = String.format("%%0%dd", 4); 
			String z = String.format(format, jXvalRun);
			String filename = args[5]+z;
			System.out.println("Current label subset "+jXvalRun+" of "+lpSubset.length);			
			System.out.println("\tCreating submatrix by columns...");
			LabeledBigSparseMatrix subMat;
			try {
				lpSubset[jXvalRun].writeToText(new FileOutputStream(new File(filename+"-labels.txt"))).close();
			} catch (FileNotFoundException e1) {
				e1.printStackTrace();
				return;
			} catch (IOException e1) {
				e1.printStackTrace();
				return;
			}
			try {
				subMat = new LabeledBigSparseMatrix(mat.extractSubMatrixByCols(lpSubset[jXvalRun].getLabeledColumnIndexes()));
			} catch (IOException e) {		
				e.printStackTrace();
				return;
			}
			subMat.setAllColLabels(lpSubset[jXvalRun].getColumnLabelValues());

			System.out.println("\tRunning KNN...");
			tic = System.currentTimeMillis();
			BigDenseArrayVector result = BigSparseMatrixTools.runThreadedKNNLabeledBigSparseMatrixRows(nThreads, k, subMat);
			System.out.println("\tCompleted in "+((System.currentTimeMillis()-tic)/1000.f)+" s");


			System.out.println("\tWriting results to: "+filename);
			try {				
				result.writeTextFile(filename+".txt");
			} catch (IOException e) {

				e.printStackTrace();
			}
		}
		System.out.println("Completed in "+((System.currentTimeMillis()-firstTic)/1000.f)+" s");



	}
}