% this function reads usps digits
% input option structure should contain the following fields
%	pathTodata
%	numfolds : number of folds
%	foldnum  : fold number
%	digitsList :	List of digits to be read in the V-matrix
%	numSampleList : List of number of samples per element of 'digitsList' to be read into the V-matrix
%	downsampleFlag : True if downsamples is required (not implemented yet!)
%	classID		: List of classID's
function [V,IndicesForBasis, IndicesForTrain,IndicesForTest] = ...
		 uspsdigits(options)
	% uspsdigits - read usps digits
	%
        CurrPath = pwd ;
        addpath(CurrPath) ;
    
	% Reduces the size of the images (by a factor 0.5) 
	% Set to 0 to avoid reducing. Set to 1 to reduce.
	reducesize = options.downsampleFlag;  if reducesize, error('downsampling is not implmented yet!'); end
    
	% This is where the usps digits images reside
	TotalNumSamples = 1100 ;
	thepath = options.pathTodata ;
	numfolds = options.numfolds ;
	foldnum = options.foldnum ;
	classid = options.classID ;
    digitsList = options.digitsList ;

	% load data
	load([thepath '/usps_all.mat'])
	NumBasisTrainPerclass = options.numSampleList ;
	ImgIndices = {} ;
	for fldr=1:length(NumBasisTrainPerclass)
		[IndicesForTrain{fldr},IndicesForTest{fldr}] =  CrossValidateIndices([1:TotalNumSamples],...
											numfolds,foldnum) ;
		if (length(IndicesForTrain{fldr}) < NumBasisTrainPerclass{fldr} )
			error('requested number of samples for basis -learning process is more than number of samples per fold !!!!') ;
		end
		IndicesForBasis{fldr} = IndicesForTrain{fldr}(1:NumBasisTrainPerclass{fldr}) ;
	end	


	% Create the data matrix
	if reducesize, V = zeros(8*8,30); 
	else V = zeros(16*16,30); 
	end

	% Step through each subject and each image
	fprintf('Reading in the images...\n');
	i = 0;
	for fldr=1:length(NumBasisTrainPerclass)
		for imgcnt=IndicesForBasis{fldr}
			i = i + 1 ;
			V(:,i) = data(:,imgcnt,digitsList{fldr}) ;
		end
		fprintf('[%s]',classid{fldr})
    	end
	fprintf('\n');

	% Same preprocessing as Stan Li et al
	minval = min(V);
	V = V - ones(size(V,1),1)*minval;
	maxval = max(V);
	V = (V*255) ./ (ones(size(V,1),1)*maxval);

	% Additionally, this is required to avoid having any exact zeros:
	% (divergence objective cannot handle them!)
	V = max(V,1e-4);

	% Finally, divide by 10000 to avoid too large values for nmfsc algorithm
	V = V/10000;

	% Done!
    cd(CurrPath)
end

function [train_Indices,test_Indices] =  CrossValidateIndices(S,numfolds,foldnum) 
	start_index = floor(length(S)/numfolds*(foldnum-1))+1 ;
	end_index = min(start_index + ceil(length(S)/numfolds),length(S)) ;
	test_Indices = S(start_index:end_index) ;
	train_Indices = setdiff(S,test_Indices) ;
end
