package bl.coe;

import java.io.IOException;

import bl.coe.BigSparseMath.BigDenseArrayVector;
import bl.coe.BigSparseMath.BigSparseMatrix;
import bl.coe.BigSparseMath.BigSparseMatrixTools;
import bl.coe.BigSparseMath.LabelPairs;
import bl.coe.BigSparseMath.LabeledBigSparseMatrix;

/**
 * The Class SimilarityWeightedKNNByRows.
 * 
 * <p> <b><i>Program Usage:</i> bl.coe.SimilarityWeightedKNNByRows K[int] [matrix.bsm] [columns/label pairs.txt] [destination.txt] [optional: # threads]</b>
 * 
 * <p> Perform similiarity weighted k-nearest neighbors (KNN) on the rows of a matrix (matrix.bsm) using the subset of 
 * columns with labels indicated in the LabelPairs text file. 
 * 
 * <p> The results are written to a text file with 1 line per row. First with the # of neighbors used and followed by all possible nearest (with count = max sum similarity... unlikely to be more than 1) 
 * neighbors written out for each row. The algorithm is as follows
 * <ol>
 *  <li>	Given a NxN matrix with labels on M columns. 
 *  <li>	Extract a NxM sub-matrix
 *  <li>	For each row, find the K nearest neighbors with values >0. 
 *  <li>	For each unique label in this set, sum the similarity score for all of the subset of k neighbor with that label.
 *  <li>	Choose the label with the highest total score. With floating point similarity scores, there is almost no chance that there will be a tie.
 *  <li>	Caveat: If there are more than K elements with the same score as the Kth score, then all are used  
 * 	</ol> 
 * 
 * <p> The matrix is saved in a big-endian binary format. See the BigSparseMatrix class below.
 * 
 * @see bl.coe.BigSparseMath.BigMathPreferences
 * @see bl.coe.BigSparseMath.BigSparseMatrix
 * @see bl.coe.BigSparseMath.LabelPairs
 * @see bl.coe.BigSparseMath.BigSparseMatrixTools
 * 
 * @author Bennett Landman, bennett.landman@vanderbilt.edu
 */
public class SimilarityWeightedKNNByRows {
	
	/**
	 * The main method.
	 * 
	 * @param args the arguments
	 */
	public static void main(String []args){
		System.out.println("bl.coe.SimilarityWeightedKNNByRows K<int> <matrix.bsm> <columns/label pairs.txt> <destination.txt> [optional: # threads]");
		if(args.length!=4 && args.length!=5)
			return;
		int k = Integer.valueOf(args[0]);
		int nThreads =1;
		if(args.length==5)
			nThreads = Integer.valueOf(args[4]);
		System.out.println("Performing Similarity Weighted "+k+"-NN with "+nThreads+" parallel thread(s)");
		BigSparseMatrix mat =null;
		long tic=System.currentTimeMillis();
		try {
			System.out.println("Loading matrix (this might take a while)...");
			mat= new BigSparseMatrix(args[1]);
		} catch (IOException e) {
			System.out.println("Failed: "+e.getMessage());
			e.printStackTrace();
			return;
		}
		long toc=System.currentTimeMillis()-tic;
		System.out.println("Loaded:");		
		System.out.println(args[1]);
		BigSparseMatrixTools.printStat(mat);
		System.out.println("Loaded in:         "+toc/1000.f+" s");

		System.out.println("Loading labels "+args[2]);
		LabelPairs lp;
		try {
			lp = new LabelPairs(args[2]);
		} catch (IOException e1) {
			e1.printStackTrace();
			return;
		} 
		BigSparseMatrixTools.printStat(lp);
		
		System.out.println("Creating submatrix by columns...");
		LabeledBigSparseMatrix subMat;
		try {
			subMat = new LabeledBigSparseMatrix(mat.extractSubMatrixByCols(lp.getLabeledColumnIndexes()));
		} catch (IOException e) {		
			e.printStackTrace();
			return;
		}
		subMat.setAllColLabels(lp.getColumnLabelValues());
		
		System.out.println("Running KNN...");
		tic = System.currentTimeMillis();
		BigDenseArrayVector result = BigSparseMatrixTools.runThreadedSimilarityWeightedKNNLabeledBigSparseMatrixRows(nThreads, k, subMat,true);
		System.out.println("Completed in "+((System.currentTimeMillis()-tic)/1000.f)+" s");
		
		System.out.println("Writing results to: "+args[3]);
		try {
			result.writeTextFile(args[3]);
		} catch (IOException e) {
			
			e.printStackTrace();
		}
		
		
		
		
	}
}