package bl.coe.BigSparseMath;

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.Vector;

/**
 * The Class BigSparseMatixRow.
 * 
 * Provides a mechanism for storing large sparse vectors
 * Note that this vector structure does not have a concept of 
 * fixed length. One may set any position indexed between 0...MAXINT.
 * Length correspond to the amount of storage space that is used. 
 * 
 * @author Bennett Landman, bennett.landman@vanderbilt.edu
 */
public class BigSparseVector implements BigVector {

	/** The Constant UUID. used to identify file types. */
	private final static String UUID = "BSV(v1.0)"+0+"-54318644-8e17-41ee-86b4-556fac2ea437";

	/** The index. Number of USED indexes in the allocated arrays. */
	protected int n;	
	
	/** The indexes. */
	protected int indexes[];

	/** The values. */
	protected float values[];

	/** The growth factor. Rate at which to increase storage when a set occurs beyond the allocated memory */
	protected float growthFactor = 0.1f; // 10%
	
	/**
	 * Instantiates a new big sparse vector.
	 * 
	 * @param cc the indexes for the values
	 * @param vv the values at the specified indices.
	 */
	public BigSparseVector(int[] cc, float[] vv) {
		this(cc,vv,cc.length);
	}

	/**
	 * Instantiates a new big sparse vector.
	 * 
	 * @param cc the cc
	 * @param vv the vv
	 * @param length the length
	 */
	public BigSparseVector(int[] cc, float[] vv, int length) {
		n = length;
		indexes = cc.clone();
		values = vv.clone();
	}

	/**
	 * Instantiates a new big sparse vector with 0 used indices by a storage of length.
	 * 
	 * @param length the length
	 */
	public BigSparseVector(int length) {
		n = 0;
		indexes = new int[length];
		values = new float[length];
	}

	/**
	 * Instantiates a new big sparse vector.
	 * 
	 * @param vec the vec
	 */
	public BigSparseVector(BigDenseVector vec) {
		int cnt = vec.countNonEmpty();
		int idx=0;
		indexes = new int[cnt];
		values = new float[cnt];
		n = cnt;
		for(int i=0;i<vec.getLength();i++) {
			float val = vec.get(i);
			if(val!=0) {
				indexes[idx]=i;
				values[idx]=val;
				idx++;
			}
		}
	}

	/**
	 * Gets the value at index location c.
	 * 
	 * @param c the index location
	 * 
	 * @return the value
	 */
	public float get(int c) {
		if(n==0)
			return 0;
		int index = findEntryInSortedArray(indexes,n,c);
		if(index<0)
			return 0;
		else 
			return values[index];		
	}

	/**
	 * Find entry in sorted array. Used internally to BigSparseVector.
	 * 
	 * @param sortedList the sorted list of integers
	 * @param length the length of the list
	 * @param target the target integer
	 * 
	 * @return x, the location of target in the sortedList (0 indexed), if the
	 * target is not in the list, then the target would fall before the -x-length element
	 * of the sortedList
	 */
	public static int findEntryInSortedArray(int[] sortedList, int length, int target) {
		int left=0; 
		int leftV = sortedList[left];
		if(leftV==target)
			return left;
		else if(leftV>target)
			return -length;
		int right = length-1;
		int rightV = sortedList[right];
		if(rightV==target)
			return right;
		else if(rightV<target)
			return -2*length;
		int middle = (left+right)/2;
		int middleV = sortedList[middle];
		while(left<right) {
			if(middleV==target)
				return middle;
			//			System.out.println(leftV+" "+middleV+" "+rightV);

			if(middle==left || middle==right)
				break;
			if(middleV>target) {
				right = middle;
				rightV = middleV;
			}else {
				left = middle;
				leftV = middleV;
			}

			middle = (int)Math.floor((left+right)/2.f);			
			middleV=sortedList[middle];			
		}		
		return -1*right-length; // not found
	}



	/**
	 * Sets the vector value.
	 * 
	 * @param c the index location
	 * @param v the value
	 */
	public void set(int c, float v) {
		//		System.out.println("setv;"+c+" "+v);
		//		System.out.println("setv;"+n+" "+indexes.length);
		if(n==0) {
			values[0] = v;
			indexes[0]=c;
			n++;
			return;
		}
		int index = findEntryInSortedArray(indexes,n,c);
		if(index>=0)
			values[index]=v;
		else {
			int insertBeforeLocation = -index-n;
			if(n>=values.length) {
				//the vector is full, we need to add another space
				int newN = (int)(Math.ceil((1+growthFactor)*n));
//				System.out.println("WARNING: Grow! from "+n+" to "+newN+" for "+c);
				grow(newN);
			}
			// there is room to insert
			int curI = indexes[insertBeforeLocation];
			float curV = values[insertBeforeLocation];

			indexes[insertBeforeLocation] = c;
			values[insertBeforeLocation] = v;
			int nextI=0;float nextV=0;
			for(int i=insertBeforeLocation+1;i<=n;i++) {
				if(i<n){
					nextI = indexes[i];
					nextV = values[i];
				}
				indexes[i] = curI;
				values[i] = curV;
				curI=nextI;
				curV=nextV;
			}
			n++;		
		}

	}

	/**
	 * Grow the internal storage. New memory is allocated and data are copied.
	 * 
	 * @param newN the new maximum storage size. 
	 */
	protected void grow(int newN){
		int []newIndexes = new int[newN];
		float []newValues = new float[newN];
		for(int i=0;i<n;i++){
			newIndexes[i]=indexes[i];
			newValues[i]=values[i];
		}
		indexes=newIndexes;
		values=newValues;
	}

	/**
	 * Extract a sub vector.
	 * 
	 * @param subset the indexes subset of values to extract
	 * 
	 * @return the big sparse vector
	 */
	public BigSparseVector extractSubVector(int[] subset) {
		int []mySubset = subset.clone();
		Arrays.sort(mySubset);
		BigSparseVector ret = new BigSparseVector(subset.length); //upper bound
		int subSetIndex = 0;
		int myIndex = 0;
		int retIndex = 0;
		for(myIndex=0;myIndex<indexes.length;myIndex++) {
			while(mySubset[subSetIndex]<indexes[myIndex] && (subSetIndex<mySubset.length-1)) {
				subSetIndex++;				
			}
			if(subSetIndex>mySubset.length)
				break;
			if(indexes[myIndex]==mySubset[subSetIndex]) {
				ret.values[retIndex]=values[myIndex];
				ret.indexes[retIndex]=subSetIndex;	
				retIndex++;
			} 
		}		
		ret.n=retIndex;
		return ret;
	}

	/** 
	 * Creates a copy of this vector. 
	 * 
	 * @return a new vector with a copy of this vectors data. 
	 */
	public BigSparseVector clone() {
		BigSparseVector copy = new BigSparseVector(indexes, values,n);		
		return copy;	
	}

	/**
	 * Sets the growth factor which is the fractional rate at which memory 
	 * is allocated when more elements are added to a 
	 * 
	 * @param gf the new growth factor. Note 0.1 corresponds to 10%.
	 */
	public void setGrowthFactor(float gf) {
		if(gf>0)
			growthFactor = gf;
	}

	/** 
	 * count the number of non-zero entries in this vector.
	 * 
	 * @return n the number of non-zero entries. 
	 * 
	 * @see bl.coe.BigSparseMath.BigVector#countNonEmpty()
	 */
	public int countNonEmpty() {
		return n;
	}

	/**
	 * Write the vector to a binary stream (with header)
	 * 
	 * @param outFp the out data stream
	 * 
	 * @throws IOException Signals that an I/O exception has occurred.
	 */
	public void write(DataOutputStream outFp) throws IOException {
		byte []uuid = UUID.getBytes();
		outFp.write(uuid);
		outFp.writeShort(1); // endianess check 

		outFp.writeInt(n);
		writeData(outFp);
	}


	/**
	 * Read the vector from a binary stream (with header)
	 * 
	 * @param inFp the in fp
	 * 
	 * @return the big sparse vector
	 * 
	 * @throws IOException Signals that an I/O exception has occurred.
	 */
	public static BigSparseVector read(DataInputStream inFp) throws IOException {

		byte []uuid = UUID.getBytes();
		byte []bytes = new byte[uuid.length];
		inFp.readFully(bytes);
		if(!Arrays.equals(uuid, bytes)) {
			inFp.close();
			throw new IOException("Not a valid BSV file. UUID does not match.");
		}			

		if(inFp.readShort()!=1) {
			inFp.close();
			throw new IOException("Not a valid BSV file. Likely endianess mismatch.");
		}

		int newN = inFp.readInt();
		BigSparseVector vec = new BigSparseVector(newN); 		
		return vec;
	}


	/**
	 * Write data (without a header)
	 * 
	 * @param outFp the out fp
	 * 
	 * @throws IOException Signals that an I/O exception has occurred.
	 */
	public void writeData(DataOutputStream outFp) throws IOException {

		for(int i=0;i<n;i++) {
			outFp.writeInt(indexes[i]);
			outFp.writeFloat(values[i]);
		}		
	}

	/**
	 * Read data (without a header)
	 * 
	 * @param newN the number of elements to be read
	 * @param inFp the in fp
	 * 
	 * @return the big sparse vector
	 * 
	 * @throws IOException Signals that an I/O exception has occurred.
	 */
	public static BigSparseVector readData(int newN, DataInputStream inFp) throws IOException {
		BigSparseVector vec = new BigSparseVector(newN); 
		for(int i=0;i<newN;i++) {
			int idx = inFp.readInt();
			float val = inFp.readFloat();
			vec.values[i]=val;
			vec.indexes[i]=idx;			
		}
		vec.n=newN;
		return vec;
	}

	/**
	 * Gets the index associated with the internal reference position.
	 * 
	 * @param i the internal index
	 * 
	 * @return the vector index at the internal position
	 */
	public int getIndexAt(int i) {
		return indexes[i];
	}

	/**
	 * Gets the value at the internal position
	 * 
	 * @param i the internal reference position
	 * 
	 * @return the value at the internal reference position
	 */
	public float getValueAt(int i) {
		return values[i];
	}

	/**
	 * Load full vector text file. The format is one number per line. 
	 * 
	 * @param file the file
	 * 
	 * @return the big sparse vector
	 * 
	 * @throws IOException Signals that an I/O exception has occurred.
	 */
	static public BigSparseVector loadFullVectorTextFile(File file) throws IOException {
		BufferedReader inFp = new BufferedReader((new FileReader(file)));				
		String line=null;
		Vector<Integer> index = new Vector<Integer>();
		Vector<Float> values = new Vector<Float>();
		int idx=0;
		while(null!=(line=inFp.readLine())) { 
			String []numbers= line.split("[ \t,;:a-zA-Z]+");
			for(int i=0;i<numbers.length;i++) {
				float val = Float.valueOf(numbers[i]);
				if(val!=0.f) {
					index.add(new Integer(idx)); 
					values.add(new Float(val));
					idx++;
				}
			}
		}
		inFp.close();		
		int []indexes = new int[idx+1];
		float []vals= new float[idx+1];
		for(int i=0;i<index.size();i++) {
			indexes[i] = index.get(i).intValue();
			vals[i]=values.get(i).floatValue();
		}
		return new BigSparseVector(indexes,vals);

	}

	/** 
	 * Returns the number of entries stored in this vector. 
	 * 
	 * @return n the number of vector entries
	 * @see bl.coe.BigSparseMath.BigVector#getLength()
	 */
	public int getLength() {
		return n;
	}

	/**
	 * Gets the data directly (if possible). Otherwise, clone
	 * the data. 
	 * 
	 * @return the data array of values
	 */
	public float[] getDataDirect() {
		if(n==values.length)
			return values;
		float []data = new float[this.n];
		for(int i=0;i<n;i++)
			data[i]=this.values[i];
		return data;
	}
	
	/**
	 * Gets the index directly (if possible). Otherwise, clone
	 * the data. 
	 * 
	 * @return the index positions 
	 */
	public int[] getIndexDirect() {
		if(n==indexes.length)
			return indexes;
		int []data = new int[this.n];
		for(int i=0;i<n;i++)
			data[i]=this.indexes[i];
		return data;
	}
	
	/**
	 * Gets the data directly. 
	 * The returned vector maybe larger than the
	 * number of elements currently stored. 
	 * 
	 * @return the data direct unsafe
	 */
	public float[] getDataDirectUnsafe() {		
		return values;
	}

	@Override
	public int getLastIndex() {
		int lastIndex =0;
		for(int i=0;i<n;i++)
			if(lastIndex<indexes[i])
				lastIndex=indexes[i];
		return lastIndex;
	}
}
