package bl.coe.BigSparseMath;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.Vector;

/**
 * The Class BigSparseMatrix.
 * 
 * <p> Provides an efficient representation for sparse matrices (density<50%).
 * 
 * <p> BigSparseMatrix objects maybe read from the disk as text files (either in UMD's format or in full
 * text matrix format). The preferred methods of storage for 
 * 
 * <p> The preferred method of storage and access for BigSparseMatrix objects is through the custom "BSM" 
 * (Big Sparse Matrix) format. These files typically are identified with a ".bsm" extension, but the 
 * file name is not relevant to the decoding. 
 * 
 * <p><h2>BSM File Format</h2>
 * <h3>Header</h3>
 * <ul>
 * <li> 10 character file identifier : "BSM(v1.0)" (null terminated)
 * <li> 37 character unique id string : "-54318644-8e17-41ee-86b4-556fac2ea437"
 * <li> 2 bytes (16 bit) short integer : 1 (used to check endianness)
 * <li> 4 bytes (32 bit) integer : # of Rows (N) 
 * <li> 4 bytes (32 bit) integer : # of Columns (M)
 * </ul> 
 * <h3> Row Density Data </h3> 
 * <ul>
 * <li> Nx4 bytes (32 bit) integers : # of recorded elements on each row (P(i)). Sequentially presented from i=0...(N-1)
 * </ul>
 * <h3> Data Values </h3> 
 * <ul> 
 * <li> For each row (in order i=0...N-1): 
 * <ul> 
 * <li> P(i) x 8 bytes (32 bit integer followed by 32 bit floating point) : column position for element on row i (indexed j=0...(M-1)
 * followed by the floating point value for location (i,j)
 * </ul>
 * </ul>
 * 
 * 
 *
 * @author Bennett Landman, bennett.landman@vanderbilt.edu
 */
public class BigSparseMatrix extends BigMatrix {
	
	/** The Constant UUID - used to identify the file format. */
	private final static String UUID = "BSM(v1.0)"+0+"-54318644-8e17-41ee-86b4-556fac2ea437"; 

	/** The rows. of the matrix */
	protected BigSparseVector rows[];
	
	/** The Nrows. number of rows in the matrix */
	protected int Nrows;

	/** The Mcols. number of columns in the matrix */
	protected int Mcols; 

	/** The Sticky row index. "original" row index allows remapping of matrix rows for sub-matrix extraction */
	protected int StickyRowIndex[];

	/** The Sticky col index. "original" col index allows remapping of matrix cols for sub-matrix extraction */
	protected int StickyColIndex[];

	/**
	 * Instantiates a new big sparse matrix.
	 * 
	 * @param n the number of rows
	 * @param m the number of columns
	 */
	public BigSparseMatrix(int n, int m) {
		Nrows= n;
		Mcols = m;
		rows = new BigSparseVector[n];
		StickyRowIndex = new int[n];
		remapStickyRowIndexes();
		StickyColIndex = new int[m];
		remapStickyColIndexes();
	}

	/**
	 * Instantiates a new big sparse matrix based on a BSM file
	 * 
	 * @param BSMfile the filename
	 * 
	 * @throws IOException Signals that an I/O exception has occurred.
	 */
	public BigSparseMatrix(String BSMfile) throws IOException {
		this(new File(BSMfile));
	}

	/**
	 * Instantiates a new big sparse matrix based on a BSM file
	 * 
	 * @param BSMfile the File object for the BSM file
	 * 
	 * @throws IOException Signals that an I/O exception has occurred.
	 */
	public BigSparseMatrix(File BSMfile) throws IOException {
		this(new FileInputStream(BSMfile));
	}

	/**
	 * Instantiates a new big sparse matrix by cloning an existing BSM object. 
	 * Note that row data ARE not copied. 
	 * 
	 * @param bsm the matrix to clone
	 * 
	 * @throws IOException Signals that an I/O exception has occurred.
	 */
	public BigSparseMatrix(BigSparseMatrix bsm) throws IOException {
		Mcols=bsm.Mcols;
		Nrows=bsm.Nrows;
		rows =bsm.rows;
		StickyColIndex = bsm.StickyColIndex;
		StickyRowIndex = bsm.StickyRowIndex;		
	}

	/**
	 * Instantiates a new big sparse matrix based on an inputstream with a BSMFile
	 * 
	 * @param BSMfile the input stream to use
	 * 
	 * @throws IOException Signals that an I/O exception has occurred.
	 */
	public BigSparseMatrix(InputStream BSMfile) throws IOException {
		long tic=System.currentTimeMillis();
		DataInputStream inFp = new DataInputStream(new BufferedInputStream((BSMfile)));
		byte []uuid = UUID.getBytes();
		byte []bytes = new byte[uuid.length];
		inFp.readFully(bytes);
		if(!Arrays.equals(uuid, bytes)) {
			inFp.close();
			throw new IOException("Not a valid BSM file. UUID does not match.");
		}			

		if(inFp.readShort()!=1) {
			inFp.close();
			throw new IOException("Not a valid BSM file. Likely endianess mismatch.");
		}

		Nrows=inFp.readInt();
		Mcols = inFp.readInt();
		if(BigMathPreferences.verbose) {
			System.out.println("BSM Load: "+Nrows+"x"+Mcols);System.out.flush();
		}
		int []rowSize = new int[Nrows];
		for(int i=0;i<Nrows;i++) {
			rowSize[i]=inFp.readInt();		
			//			System.out.println(rowSize[i]);
		}
		rows = new BigSparseVector[Nrows];		
		for(int i=0;i<Nrows;i++) {			
			if(rowSize[i]>0)
				rows[i] = BigSparseVector.readData(rowSize[i],inFp);
		}		

		StickyRowIndex = new int[Nrows];
		remapStickyRowIndexes();
		StickyColIndex = new int[Mcols];
		remapStickyColIndexes();
		inFp.close();
		if(BigMathPreferences.verbose) {
			long toc=System.currentTimeMillis()-tic;
			System.out.println("Loaded in:         "+toc/1000.f+" s");
		}
	}

	
	/**
	 * Write to a BSM file
	 * 
	 * @param BSMFile the output stream
	 * 
	 * @throws IOException Signals that an I/O exception has occurred.
	 */
	public void write(OutputStream BSMFile) throws IOException {

		DataOutputStream outFp = new DataOutputStream(new BufferedOutputStream((BSMFile)));
		byte []uuid = UUID.getBytes();

		outFp.write(uuid);
		outFp.writeShort(1);
		outFp.writeInt(Nrows);
		outFp.writeInt(Mcols);
		for(int i=0;i<Nrows;i++) {
			if(rows[i]!=null)
				outFp.writeInt(rows[i].countNonEmpty());
			else 
				outFp.writeInt(0);
		}
		for(int i=0;i<Nrows;i++) {
			if(rows[i]!=null)
				rows[i].writeData(outFp);
		}		
		outFp.close();		
	}

	/**
	 * Remap row indexes.
	 * Reset the sticky row matrix to 0...N-1
	 */
	public void remapStickyRowIndexes() {
		for(int i=0;i<Nrows;i++)
			StickyRowIndex[i]=i;		
	}

	/**
	 * Remap col indexes.
	 * Reset the sticky col matrix to 0...M-1
	 */
	public void remapStickyColIndexes() {
		for(int i=0;i<Mcols;i++)
			StickyColIndex[i]=i;
	}

	/**
	 * Gets the value of the matrix at r,c.
	 * 
	 * @param r the row
	 * @param c the col
	 * 
	 * @return the float value at the location
	 */
	public float get(int r, int c) {		
		BigSparseVector row = rows[r];
		if(row==null) 
			return 0.f;
		else 
			return row.get(c);
	}

	/**
	 * Sets the value of the matrix at location r,c.
	 * 
	 * @param r the row
	 * @param c the col
	 * @param v the value
	 */
	public void set(int r, int c, float v) {
		BigSparseVector row = rows[r];
		if(row==null) {
			int cc[] = new int[1];
			cc[0]=c;
			float[] vv = new float[1];
			vv[0]=v;
			row = new BigSparseVector(cc,vv);
			rows[r]=row;
		} else { 		
			row.set(c,v);
		}
	}

	/**
	 * Extract sub matrix by rows. (Does not copy data)
	 * 
	 * @param vec the vector of row indexes
	 * 
	 * @return the big sparse matrix
	 */
	public BigSparseMatrix extractSubMatrixByRows(BigVector vec) {
		int []rowSubset = new int[vec.getLength()];
		for(int i=0;i<vec.getLength();i++)
			rowSubset[i]=(int)vec.get(i);
		return extractSubMatrixByRows(rowSubset);		
	}

	/**
	 * Extract sub matrix by rows.
	 * 
	 * @param rowSubset the row subset
	 * 
	 * @return the big sparse matrix
	 */
	public BigSparseMatrix extractSubMatrixByRows(int []rowSubset) {
		BigSparseMatrix subMat = new BigSparseMatrix(rowSubset.length,Mcols);
		subMat.copySubMatrixByRows(this,rowSubset);		
		return subMat;				
	}

	/**
	 * Copy sub matrix by rows.
	 * 
	 * @param src the src
	 * @param rowSubset the row subset
	 */
	protected void copySubMatrixByRows(BigSparseMatrix src,
			int[] rowSubset) {
		for(int r=0;r<rowSubset.length;r++) {
			if(src.rows[rowSubset[r]]!=null)
				setRow(r,src.rows[rowSubset[r]].clone());
		}
		setStickyRowMapping(rowSubset,src.StickyRowIndex);
	}

	/**
	 * Extract sub matrix by cols.
	 * 
	 * @param vec the vec
	 * 
	 * @return the big sparse matrix
	 */
	public BigSparseMatrix extractSubMatrixByCols(BigVector vec) {
		int []colSubset = new int[vec.getLength()];
		for(int i=0;i<vec.getLength();i++)
			colSubset[i]=(int)vec.get(i);
		return extractSubMatrixByCols(colSubset);		
	}

	/**
	 * Extract sub matrix by cols.
	 * 
	 * @param colSubset the col subset
	 * 
	 * @return the big sparse matrix
	 */
	public BigSparseMatrix extractSubMatrixByCols(int []colSubset) {
		BigSparseMatrix subMat = new BigSparseMatrix(Nrows,colSubset.length);
		subMat.copySubMatrixByCols(this,colSubset);
		return subMat;				
	}

	/**
	 * Copy sub matrix by cols.
	 * 
	 * @param src the src
	 * @param colSubset the col subset
	 */
	protected void copySubMatrixByCols(BigSparseMatrix src,
			int[] colSubset) {
		for(int r=0;r<Nrows;r++) {
			if(src.rows[r]!=null)
				setRow(r,src.rows[r].extractSubVector(colSubset));
		}		
		setStickyColMapping(colSubset,src.StickyColIndex);		
	}

	

	/**
	 * Copy sub matrix.
	 * 
	 * @param src the src
	 * @param rowSubset the row subset
	 * @param colSubset the col subset
	 */
	protected void copySubMatrix(BigSparseMatrix src,
			int[] rowSubset, int[] colSubset) {
		for(int r=0;r<rowSubset.length;r++) {
			if(rows[rowSubset[r]]!=null)
				setRow(r,src.rows[rowSubset[r]].extractSubVector(colSubset));
		}
		setStickyRowMapping(rowSubset,src.StickyRowIndex);
		setStickyColMapping(colSubset,src.StickyColIndex);		
	}

	/**
	 * Sets the row.
	 * 
	 * @param r the r
	 * @param row the row
	 */
	public void setRow(int r, BigSparseVector row) {
		rows[r]= row;		
	}

	/**
	 * Gets the row.
	 * 
	 * @param r the r
	 * 
	 * @return the row
	 */
	protected BigSparseVector getRow(int r) {
		return rows[r];
	}

	/**
	 * Gets the row copy.
	 * 
	 * @param r the r
	 * 
	 * @return the row copy
	 */
	public BigSparseVector getRowCopy(int r) {
		return rows[r].clone();
	}

	/**
	 * Gets the col copy.
	 * 
	 * @param c the c
	 * 
	 * @return the col copy
	 */
	public BigSparseVector getColCopy(int c) {
		Vector<Float> vals = new Vector<Float>(4096);
		Vector<Integer> offset = new Vector<Integer>(4096);
		for(int i=0;i<Nrows;i++){
			float val = get(i,c);
			if(val!=0.f) {
				vals.add(new Float(val));
				offset.add(new Integer(i));
			}
		}
		float vals_f[] = new float[vals.size()];
		int offset_i[] = new int[offset.size()];
		for(int i=0;i<vals_f.length;i++) {
			vals_f[i] = (vals.get(i)).floatValue();
			offset_i[i] = (offset.get(i)).intValue();
		}
		return new BigSparseVector(offset_i,vals_f);		
	}

	/**
	 * Sets the sticky col mapping.
	 * 
	 * @param colSubset the col subset
	 * @param oldStickyColIndex the old sticky col index
	 */
	protected void setStickyColMapping(int[] colSubset, int[] oldStickyColIndex) {
		for(int i=0;i<StickyColIndex.length;i++)
			StickyColIndex[i] = oldStickyColIndex[colSubset[i]];		
	}

	/**
	 * Sets the sticky row mapping.
	 * 
	 * @param rowSubset the row subset
	 * @param oldStickyRowIndex the old sticky row index
	 */
	protected void setStickyRowMapping(int[] rowSubset, int[] oldStickyRowIndex) {
		for(int i=0;i<StickyRowIndex.length;i++)
			StickyRowIndex[i] = oldStickyRowIndex[rowSubset[i]];		
	}

	/**
	 * Parses the full matrix text file.
	 * 
	 * @param file the file
	 * @param N the n
	 * @param M the m
	 * 
	 * @return the big sparse matrix
	 * 
	 * @throws IOException Signals that an I/O exception has occurred.
	 */
	public static BigSparseMatrix parseFullMatrixTextFile(File file, int N, int M) throws IOException {
		BigSparseMatrix mat = new BigSparseMatrix(N,M);
		mat.loadFullMatrixTextFile(file);
		return mat;
	}

	/**
	 * Load full matrix text file.
	 * 
	 * @param file the file
	 * 
	 * @throws IOException Signals that an I/O exception has occurred.
	 */
	public void loadFullMatrixTextFile(File file) throws IOException {
		BufferedReader inFp = new BufferedReader((new FileReader(file)));
		float []rowFloat = new float[Mcols];
		int nonZero=0;
		BigSparseVector row;;
		long tic = System.currentTimeMillis();

		for(int r=0;r<Nrows;r++) {
			if(r%1000==5 && BigMathPreferences.verbose) {
				float tdiff =(System.currentTimeMillis()-tic)/1000.f;
				float trem = tdiff/r*(Nrows-r);
				int hh = (int)Math.floor(trem/3600.f);
				int mm = (int)Math.floor(trem/60.f-hh*60);
				float ss = (float)Math.floor(trem-hh*3600-mm*60);				
				System.out.println("Loading row: "+r+" \tElapsed Time:"+(tdiff)+" s\tEstimate time remaining:"+hh+":"+mm+":"+ss);
				System.out.flush();
			}

			row=null;
			nonZero=0;
			String []numbers= inFp.readLine().trim().split(BigMathPreferences.WHITE_SPACE_REGEX);

			if(numbers.length!=Mcols) {
				inFp.close();
				throw new IOException("Invalid # of entries ("+numbers.length+")on line: "+r);
			}			
			for(int c=0;c<Mcols;c++) {

				float val = Float.valueOf(numbers[c]).floatValue();

				rowFloat[c] = val;
				if(val!=0)
					nonZero++;
			}

			if(nonZero>0){
				float []currentRowVals = new float[nonZero];
				int []currentRowIdxs = new int[nonZero];
				int i=0;
				for(int c=0;c<Mcols;c++) {
					if(rowFloat[c]!=0) {
						currentRowIdxs[i] = c; 
						currentRowVals[i] = rowFloat[c];
						i++;
					}
				}
				row = new BigSparseVector (currentRowIdxs,currentRowVals); 
			}
			setRow(r, row);
		}
		inFp.close();		
	}



	/**
	 * Load full matrix text file.
	 * 
	 * @param file the file
	 * @param setDiagonalElements the set diagonal elements
	 * @param progress the progress
	 * 
	 * @return the long[]
	 * 
	 * @throws IOException Signals that an I/O exception has occurred.
	 */
	public long[] loadUMDMatrixTextFile(File file, float setDiagonalElements, long []progress) throws IOException {
		BufferedReader inFp = new BufferedReader((new FileReader(file)));

		if(progress==null)
		{
			progress = new long[2];
			progress[0]=0;
			progress[1]=System.currentTimeMillis();
		}
		int nonZero=0;		
		long tic = progress[1];

		String line = null;
		int r=(int) progress[0];
		while(null!=(line=inFp.readLine())) {
			r++;
			if(r%1000==5 && BigMathPreferences.verbose) {
				float tdiff =(System.currentTimeMillis()-tic)/1000.f;
				float trem = tdiff/r*(Nrows-r);
				int hh = (int)Math.floor(trem/3600.f);
				int mm = (int)Math.floor(trem/60.f-hh*60);
				float ss = (float)Math.floor(trem-hh*3600-mm*60);				
				System.out.println("Loading row: "+r+" \tElapsed Time:"+(tdiff)+" s\tEstimate time remaining:"+hh+":"+mm+":"+ss);
				System.out.flush();
			}

			nonZero=0;
			String []numbers= line.split("[ \t,;:a-zA-Z]+");
			int rowID = Integer.valueOf(numbers[0]);
			BigSparseVector row =null;
			if(rows[rowID]==null) {
				if(setDiagonalElements!=0)
					row = new BigSparseVector ((numbers.length-1)/2+1);
				else
					row = new BigSparseVector ((numbers.length-1)/2);
			} else {
				System.out.println("WARNING: Dup row: "+rowID+" Line: "+(r-progress[0])+" "+file.getAbsolutePath());
				row = rows[rowID];
			}
			if(setDiagonalElements!=0)
				row.set(rowID,setDiagonalElements);
			for(int colIDOffset=1;colIDOffset<numbers.length;colIDOffset+=2) { 
				int colID = Integer.valueOf(numbers[colIDOffset]);
				float val = Float.valueOf(numbers[colIDOffset+1]);
				nonZero++;
				row.set(colID,val);
			}		
			setRow(rowID, row);
		}
		inFp.close();	
		progress[0]=r;
		return progress;
	}
	
	/**
	 * Load full matrix text file.
	 * 
	 * @param file the file
	 * @param elements the elements
	 * @param progress the progress
	 * 
	 * @return the long[]
	 * 
	 * @throws IOException Signals that an I/O exception has occurred.
	 */
	static public long[] scanUMDMatrixTextFile(File file, Vector<Integer> elements, long []progress) throws IOException {
		BufferedReader inFp = new BufferedReader((new FileReader(file)));

		if(progress==null)
		{
			progress = new long[2];
			progress[0]=0;
			progress[1]=System.currentTimeMillis();
		}
		int nonZero=0;		
		long tic = progress[1];

		String line = null;
		int r=(int) progress[0];
		while(null!=(line=inFp.readLine())) {
			r++;
			if(r%1000==5 && BigMathPreferences.verbose) {
				float tdiff =(System.currentTimeMillis()-tic)/1000.f;
								
				System.out.println("Loading row: "+r+" \tElapsed Time:"+(tdiff)+" s");
				System.out.flush();
			}

			nonZero=0;
			String []numbers= line.split("[ \t,;:a-zA-Z]+");
			int rowID = Integer.valueOf(numbers[0]);
			if(elements.size()<rowID) {
			elements.setSize(2*rowID);
			elements.set(rowID, 1);
			} else {
				Integer cnt = elements.get(rowID);
				if(cnt==null)
					elements.set(rowID, 1);
				else
					elements.set(rowID, 1+cnt);
			}
			for(int colIDOffset=1;colIDOffset<numbers.length;colIDOffset+=2) { 
				int colID = Integer.valueOf(numbers[colIDOffset]);
//				float val = Float.valueOf(numbers[colIDOffset+1]); //not needed in a scan
				nonZero++;				
				if(elements.size()<colID) {
					elements.setSize(2*colID);
					elements.set(rowID, 1);
					} else {
						Integer cnt = elements.get(colID);
						if(cnt==null)
							elements.set(colID, 1);
						else
							elements.set(colID, 1+cnt);
					}
			}					
		}
		inFp.close();	
		progress[0]=r;
		return progress;
	}


	/**
	 * Gets the non empty count.
	 * 
	 * @return the non empty count
	 */
	public long getNonEmptyCount() {
		long count=0;
		for(int i=0;i<rows.length;i++) {
			if(rows[i]!=null)
				count+=rows[i].countNonEmpty();
		}
		return count;
	}

	/**
	 * Sets the row direct.
	 * 
	 * @param r the r
	 * @param row the row
	 */
	public void setRowDirect(int r, BigSparseVector row) {
		rows[r]=row;
	}

	/**
	 * Gets the row direct. The indexAt is required to map location to column.
	 * 
	 * @param r the r
	 * 
	 * @return the row direct
	 */
	public BigSparseVector getRowDirect(int r) {
		return rows[r];
	}


	/**
	 * Gets the full matrix.
	 * 
	 * @return the full matrix
	 */
	public float [][]getFullMatrix() {
		float [][]dat = new float[Nrows][Mcols];
		for(int i=0;i<Nrows;i++)
			for(int j=0;j<Mcols;j++)
				dat[i][j] = get(i, j);
		return dat;
	}

	/**
	 * Extract sub matrix by rows unsafe.
	 * 
	 * @param selectRows the select rows
	 * 
	 * @return the big sparse matrix
	 */
	public BigSparseMatrix extractSubMatrixByRowsUnsafe(BigVector selectRows) {
		BigSparseMatrix subMat = new BigSparseMatrix(selectRows.getLength(),Mcols);
		for(int i=0;i<selectRows.getLength();i++) {
			subMat.setRowDirect(i, rows[(int)selectRows.get(i)]);
		}
		return subMat;
	}
	
	/**
	 * Gets the n rows.
	 * 
	 * @return the n rows
	 */
	public int getNRows() {
		return Nrows;
	}

	/**
	 * Gets the m cols.
	 * 
	 * @return the m cols
	 */
	public int getMCols() {
		return Mcols;
	}

	/**
	 * Compute row column counts.
	 * 
	 * @return the int[][]
	 */
	public int[][] computeRowColumnCounts() {
		int [][]counts = new int[2][(int)Math.max(Nrows,Mcols)];
		for(int i=0;i<Nrows;i++) {
			BigSparseVector row = rows[i];
			if(row!=null) {
				int []idx = row.getIndexDirect();
				for(int j=0;j<idx.length;j++) {
					counts[0][i]++;
					counts[1][idx[j]]++;
				}
			}
		}			
		return counts;
	}

}
