static const char rcsid[] = "$Id: bxh_correlate.c,v 1.17 2009-01-15 20:55:19 gadde Exp $";

/*
 * bxh_correlate.c --
 * 
 *  For a given 4-D data set, correlate each voxel's time series
 *  with a given 1-D template vector.  Output is a 3-D data set
 *  storing the correlation coefficient (r) for each voxel.
 *  Modeled after portions of tstatprofile2.m by Josh Bizzell.
 *  
 */

#include <bxh_config.h>

#include <stdio.h>

#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifndef WIN32
#include <unistd.h>
#endif
#include <math.h>

#include "bxh_niftilib.h"
#include "bxh_datarec.h"
#include "opts.h"

#ifndef XMLH_VERSIONSTR
#define XMLH_VERSIONSTR "(no version specified)"
#endif

int
main(int argc, char *argv[])
{
    int retval = 0;
    struct stat statbuf;
    const char * inputfile = NULL;
    char * outputbase_r = NULL;
    char * outputbxh_r = NULL;
    char * outputfile_r = NULL;
    char * outputfilegz_r = NULL;
    char * outputbase_t = NULL;
    char * outputbxh_t = NULL;
    char * outputfile_t = NULL;
    char * outputfilegz_t = NULL;
    const char * opt_select[] = { ":", ":", ":", ":" };
    char * opt_template = NULL;
    char * opt_templatevoxel = NULL;
    char * opt_maskfile = NULL;
    char * opt_optsfromfile = NULL;
    int opt_overwrite = 0;
    int opt_version = 0;

    const char * ordereddimnames[] = { "x", "y", "z", "t" };
    const char * mask_select[] = { ":", ":", ":" };
    struct bxhdataread bdr;
    struct bxhdataread maskbdr;
    double * templ = NULL;
    size_t templsize = 0;
    size_t * dimsizes = NULL;
    size_t * pagesizes = NULL;
    double * dataptr = NULL;
    char * maskdataptr = NULL;
    BXHDocPtr docp = NULL;
    bxhrawdatarec * outdatarec = NULL;

    const int numopts = 12;
    opt_data opts[12] = {
	{ 0x0, OPT_VAL_NONE, NULL, 0, "",
	  "Usage:\n"
	  "  bxh_correlate [opts] --template T1,T2,T3... inputxmlfile out_rfile [out_tfile]\n\n"
	  "This program correlates the time series of each voxel in a "
	  "4-D time series of volumes (inputxmlfile) "
	  "with a given \"template\" vector "
	  "(specified with --template option).  "
	  "Output (in out_rfile) is a 3-D data set storing the "
	  "correlation coefficient (r).  "
	  "The optional third argument (out_tfile) is where to write "
	  "the 3-D data set storing the corresponding t-statistic "
	  "(derived from r)." },
	{ 0x0, OPT_VAL_NONE, NULL, 0, "", "" },
	{ OPT_FLAGS_FULL, OPT_VAL_BOOL, &opt_version, 1, "version",
	  "Print version string and exit." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_optsfromfile, 1, "optsfromfile",
	  "Program options (i.e. those starting with '--') will come from "
	  "this file.  "
	  "If this option is specified, then the options in the file "
	  "will be applied after all command-line options.  "
	  "The options (and their arguments) should be specified "
	  "one per line, with the leading '--' omitted." },
	{ OPT_FLAGS_FULL, OPT_VAL_BOOL, &opt_overwrite, 1, "overwrite",
	  "Overwrite existing output files (otherwise error and exit). " },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_select[3], 1, "timeselect",
	  "Comma-separated list of timepoints to use (first timepoint is 0).  "
	  "Any timepoint can be a contiguous range, specified as two "
	  "numbers separated by a colon, i.e. 'START:END'.  "
	  "An empty END implies the last timepoint.  "
	  "The default step of 1 (one) in ranges can be changed using "
	  "'START:STEP:END', which is equivalent to "
	  "'START,START+STEP,START+(2*STEP),...,END'.  "
	  "Default is all timepoints (:)." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_select[0], 1, "xselect",
	  "Just like timeselect, but for the 'x' dimension." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_select[1], 1, "yselect",
	  "Just like timeselect, but for the 'y' dimension." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_select[2], 1, "zselect",
	  "Just like timeselect, but for the 'z' dimension." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_template, 1, "template",
	  "A comma-separated list of numbers making up the template "
	  "vector to correlate with the data.  "
	  "This option or --templatevoxel is required." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_templatevoxel, 1, "templatevoxel",
	  "A comma-separated x,y,z coordinate (indices start at 0) "
	  "indicating which voxel in the dataset to which to do the "
	  "correlation.  The value at that voxel in the output will be 1.0.  "
	  "This option or --template is required." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_maskfile, 1, "maskfile",
	  "Use this 3-D mask (should be an XML file) before doing "
	  "calculations." }
    };

    memset(&bdr, '\0', sizeof(bdr));
    memset(&maskbdr, '\0', sizeof(maskbdr));

    argc -= opt_parse(argc, argv, numopts, &opts[0], 0);
    if (opt_optsfromfile) {
	opt_parsefile(opt_optsfromfile, numopts, &opts[0], 0);
    }

    if (opt_version) {
	fprintf(stdout, "%s\n", XMLH_VERSIONSTR);
	exit(0);
    }
    if (argc < 3 || argc > 4 || (opt_template == NULL && opt_templatevoxel == NULL)) {
	fprintf(stderr, "Usage: %s --template T1,T2,T3... xmlfile out_rfile [out_tfile]\n       %s --templatevoxel X,Y,Z xmlfile out_rfile [out_tfile]\n out_rfile will hold the correlation coefficient (r)\n out_tfile, if specified, will hold the t-statistic (derived from r)\n", argv[0], argv[0]);
	fprintf(stderr, "Use the --help option for more help.\n");
	goto FAIL;
    }

    inputfile = argv[1];

    outputbase_r = (char *)malloc(sizeof(char)*(strlen(argv[2]) + 1));
    outputbxh_r = (char *)malloc(sizeof(char)*(strlen(argv[2]) + 5));
    outputfile_r = (char *)malloc(sizeof(char)*(strlen(argv[2]) + 5));
    outputfilegz_r = (char *)malloc(sizeof(char)*(strlen(argv[2]) + 8));
    strcpy(outputbxh_r, argv[2]);
    {
	char * extpos = NULL;
	extpos = strrchr(outputbxh_r, '.');
	if (extpos == NULL) {
	    /* no extension on output */
	    strcpy(outputbase_r, outputbxh_r);
	    strcpy(outputfile_r, outputbxh_r);
	    strcpy(outputfilegz_r, outputbxh_r);
	    strcat(outputfile_r, ".nii");
	    strcat(outputfilegz_r, ".nii.gz");
	} else {
	    size_t baselen = (extpos - outputbxh_r);
	    strncpy(outputbase_r, outputbxh_r, baselen);
	    strncpy(outputfile_r, outputbxh_r, baselen);
	    strncpy(outputfilegz_r, outputbxh_r, baselen);
	    outputbase_r[baselen] = '\0';
	    strcpy(outputfile_r + baselen, ".nii");
	    strcpy(outputfilegz_r + baselen, ".nii.gz");
	}
    }
    if (!opt_overwrite) {
	if (stat(outputfilegz_r, &statbuf) == 0) {
	    fprintf(stderr, "%s: output file '%s' exists.\n", argv[0], outputfilegz_r);
	    goto FAIL;
	}
	if (stat(outputbxh_r, &statbuf) == 0) {
	    fprintf(stderr, "%s: output file '%s' exists.\n", argv[0], outputbxh_r);
	    goto FAIL;
	}
    }

    if (argc == 4) {
	outputbase_t = (char *)malloc(sizeof(char)*(strlen(argv[3]) + 1));
	outputbxh_t = (char *)malloc(sizeof(char)*(strlen(argv[3]) + 5));
	outputfile_t = (char *)malloc(sizeof(char)*(strlen(argv[3]) + 5));
	outputfilegz_t = (char *)malloc(sizeof(char)*(strlen(argv[3]) + 8));
	strcpy(outputbxh_t, argv[3]);
	{
	    char * extpos = NULL;
	    extpos = strrchr(outputbxh_t, '.');
	    if (extpos == NULL) {
		/* no extension on output */
		strcpy(outputbase_t, outputbxh_t);
		strcpy(outputfile_t, outputbxh_t);
		strcpy(outputfilegz_t, outputbxh_t);
		strcat(outputfile_t, ".nii");
		strcat(outputfilegz_t, ".nii.gz");
	    } else {
		size_t baselen = (extpos - outputbxh_t);
		strncpy(outputbase_t, outputbxh_t, baselen);
		strncpy(outputfile_t, outputbxh_t, baselen);
		strncpy(outputfilegz_t, outputbxh_t, baselen);
		outputbase_t[baselen] = '\0';
		strcpy(outputfile_t + baselen, ".nii");
		strcpy(outputfilegz_t + baselen, ".nii.gz");
	    }
	}
	if (!opt_overwrite) {
	    if (stat(outputfilegz_t, &statbuf) == 0) {
		fprintf(stderr, "%s: output file '%s' exists.\n", argv[0], outputfile_t);
		goto FAIL;
	    }
	    if (stat(outputbxh_t, &statbuf) == 0) {
		fprintf(stderr, "%s: output file '%s' exists.\n", argv[0], outputbxh_t);
		goto FAIL;
	    }
	}
    }

    /* parse template */
    if (opt_template) {
	char * curpos = opt_template;
	templsize = 0;
	while (*curpos != '\0') {
	    size_t valuelen = 0;
	    char * comma = NULL;
	    char * endptr = NULL;
	    comma = strchr(curpos, ',');
	    if (comma) {
		valuelen = comma - curpos;
		*comma = '\0';
	    } else {
		valuelen = strlen(curpos);
	    }
	    templ = (double *)realloc(templ, sizeof(double)*(templsize+1));
	    templ[templsize] = strtod((char *)curpos, &endptr);
	    templsize++;
	    if (*endptr != '\0') {
		fprintf(stderr,"Error: bad template value %s\n", (char *)curpos);
		free(curpos);
		goto FAIL;
	    }
	    if (comma) {
		*comma = ',';
		curpos = comma + 1;
	    } else {
		curpos += valuelen;
	    }
	}
    }

    if (bxh_dataReadFileStart(inputfile, "image", NULL, 4, ordereddimnames, opt_select, &bdr) != 0) {
	fprintf(stderr, "Error preparing data read for '%s'.\n", inputfile);
	goto FAIL;
    }
    if (bdr.datarec->numdims != 4) {
	fprintf(stderr, "Data must be 4-dimensional.\n");
	goto FAIL;
    }
    if (bdr.datarec->dimensions[3].size == 0) {
	fprintf(stderr, "Number of time points must be greater than 0!\n");
	goto FAIL;
    }
    if (opt_template && bdr.datarec->dimensions[3].size != templsize) {
	fprintf(stderr, "Number of time points in data and template must match!\n");
	goto FAIL;
    }

    if (opt_maskfile) {
	if (bxh_dataReadFileStart(opt_maskfile, "image", NULL, 3, ordereddimnames, mask_select, &maskbdr) != 0) {
	    fprintf(stderr, "Error preparing data read for '%s'.\n", inputfile);
	    goto FAIL;
	}
	if (maskbdr.datarec->numdims != 3) {
	    fprintf(stderr, "Mask must be 3-dimensional.\n");
	    goto FAIL;
	}
	if (memcmp(maskbdr.dimsizes, bdr.dimsizes, sizeof(maskbdr.dimsizes[0]) * 3) != 0) {
	    fprintf(stderr, "Mask spatial dimensions do not match data dimensions.\n");
	    goto FAIL;
	}
    }
    
    if (bxh_dataReadFinish(&bdr, "double") != 0) {
	fprintf(stderr, "Error finishing data read for '%s'.\n", inputfile);
	goto FAIL;
    }
    if (opt_maskfile) {
	if (bxh_dataReadFinish(&maskbdr, "char") != 0) {
	    fprintf(stderr, "Error finishing data read for '%s'.\n", opt_maskfile);
	    goto FAIL;
	}
    }

    dataptr = (double *)bdr.dataptr;
    if (opt_maskfile)
	maskdataptr = (char *)maskbdr.dataptr;
    dimsizes = bdr.dimsizes;
    pagesizes = bdr.pagesizes;
    docp = bdr.docp;

    /* now that we've read the data, read the template voxel data if needed */
    if (opt_templatevoxel) {
	size_t indt;
	size_t seed[3];
	size_t seedxyz;
	int dimnum = 0;
	char * curpos = opt_templatevoxel;
	templsize = 0;
	while (*curpos != '\0' && dimnum < 3) {
	    size_t valuelen = 0;
	    char * comma = NULL;
	    char * endptr = NULL;
	    comma = strchr(curpos, ',');
	    if (comma) {
		valuelen = comma - curpos;
		*comma = '\0';
	    } else {
		valuelen = strlen(curpos);
	    }
	    seed[dimnum] = strtod((char *)curpos, &endptr);
	    dimnum++;
	    if (*endptr != '\0') {
		fprintf(stderr,"Error: bad template value %s\n", (char *)curpos);
		free(curpos);
		goto FAIL;
	    }
	    if (comma) {
		*comma = ',';
		curpos = comma + 1;
	    } else {
		curpos += valuelen;
	    }
	}
	if (dimnum != 3) {
	    fprintf(stderr,"Error: template voxel '%s' must be in the form X,Y,Z\n", (char *)opt_templatevoxel);
	    goto FAIL;
	}
	seedxyz = seed[0] + (seed[1] * pagesizes[0]) + (seed[2] * pagesizes[1]);
	templ = (double *)realloc(templ, sizeof(double)*dimsizes[3]);
	for (indt = 0; indt < dimsizes[3]; indt++) {
	    templ[indt] = dataptr[(indt * pagesizes[2]) + seedxyz];
	}
	templsize = dimsizes[3];
    }

    /*** Do the dirty work here ***/
    {
	/*** Calculate correlation to template for each voxel ***/
	double * results = (double *)malloc(sizeof(double)*pagesizes[2]);
	float * tempresults = NULL;
	double ssxy = 0;
	double ssxx = 0;
	double ssyy = 0;
	double meanx = 0;
	double meany = 0;
	double oldmeanx = 0;
	double oldmeany = 0;
	size_t voxelnum = 0; /* within volume */
	size_t t = 0;
	bxhrawdatarec * tmpdatarec = NULL;

	/* first calculate r */

	/* do template first -- we use the same template for all voxels */
	meany = 0;
	ssyy = 0;
	for (t = 0; t < dimsizes[3]; t++) {
	    double newpoint = templ[t];
	    oldmeany = meany;
	    meany += (newpoint - oldmeany) / (t + 1);
	    ssyy += (newpoint - oldmeany) * (newpoint - meany);
	}

	/* now do each voxel */
	memset(results, '\0', sizeof(double)*pagesizes[2]);
	for (voxelnum = 0; voxelnum < pagesizes[2]; voxelnum++) {
	    if (maskdataptr && maskdataptr[voxelnum] == 0)
		continue;
	    meanx = 0;
	    for (t = 0; t < dimsizes[3]; t++) {
		size_t ind = (t * pagesizes[2]) + voxelnum;
		double newpoint = dataptr[ind];
		oldmeanx = meanx;
		meanx += (newpoint - oldmeanx) / (t + 1);
	    }
	    ssxx = 0;
	    ssxy = 0;
	    for (t = 0; t < dimsizes[3]; t++) {
		size_t ind = (t * pagesizes[2]) + voxelnum;
		double newpoint = dataptr[ind];
		double xdev = newpoint - meanx;
		double ydev = templ[t] - meany;
		ssxx += xdev * xdev;
		ssxy += xdev * ydev;
	    }
	    if (ssxy == 0) {
		results[voxelnum] = 0;
	    } else {
		results[voxelnum] = ssxy / sqrt(ssxx * ssyy);
	    }
	}

	/* write out results */
	tempresults = bxh_convertBufToFloat(results, sizeof(double)*pagesizes[2], "double");
	outdatarec = bxh_datarec_copy(bdr.datarec);
	while (outdatarec->numdims > 3) {
	    outdatarec->numdims--;
	    bxh_datarec_dimdata_free(&outdatarec->dimensions[outdatarec->numdims]);
	}
	outdatarec->numdims = 3;
	free(outdatarec->elemtype);
	outdatarec->elemtype = strdup("float32");
	bxh_datarec_frags_free(outdatarec);
	bxh_datarec_addfrag(outdatarec, outputfile_r, 0, sizeof(float) * pagesizes[2], outputbxh_r, 1);
	if (bxh_datarec_writeToElement(bdr.imagedatap, outdatarec) != 0) {
	    fprintf(stderr, "Failed writing datarec\n");
	    goto FAIL;
	}
	tmpdatarec = bdr.datarec;
	bdr.datarec = outdatarec;
	if (bxh_addAutoHistoryEntry(docp, argv[0], &inputfile, 1) != 0) {
	    fprintf(stderr, "Error adding history entry\n");
	    goto FAIL;
	}
	writeBXHAndNIIGZ(outputbase_r, &bdr, tempresults, 0);
	bdr.datarec = tmpdatarec; tmpdatarec = NULL;

	free(tempresults); tempresults = NULL;
	bxh_datarec_free(outdatarec); outdatarec = NULL;

	/* now calculate t, if requested */
	if (outputfile_t) {
	    double sn2 = sqrt(dimsizes[3]-2);
	    for (voxelnum = 0; voxelnum < pagesizes[2]; voxelnum++) {
		double newpoint = results[voxelnum]; 
		if (maskdataptr && maskdataptr[voxelnum] == 0)
		    continue;
		results[voxelnum] =
		    (newpoint * sn2) / sqrt(1 - (newpoint * newpoint));
	    }

	    /* write out results */
	    tempresults = bxh_convertBufToFloat(results, sizeof(double)*pagesizes[2], "double");
	    outdatarec = bxh_datarec_copy(bdr.datarec);
	    while (outdatarec->numdims > 3) {
		outdatarec->numdims--;
		bxh_datarec_dimdata_free(&outdatarec->dimensions[outdatarec->numdims]);
	    }
	    outdatarec->numdims = 3;
	    free(outdatarec->elemtype);
	    outdatarec->elemtype = strdup("float32");
	    bxh_datarec_frags_free(outdatarec);
	    bxh_datarec_addfrag(outdatarec, outputfile_t, 0, sizeof(float) * pagesizes[2], outputbxh_t, 1);
	    if (bxh_datarec_writeToElement(bdr.imagedatap, outdatarec) != 0) {
		fprintf(stderr, "Failed writing datarec\n");
		goto FAIL;
	    }
	    tmpdatarec = bdr.datarec;
	    bdr.datarec = outdatarec;
	    if (bxh_addAutoHistoryEntry(docp, argv[0], &inputfile, 1) != 0) {
		fprintf(stderr, "Error adding history entry\n");
		goto FAIL;
	    }
	    writeBXHAndNIIGZ(outputbase_t, &bdr, tempresults, 0);
	    bdr.datarec = tmpdatarec; tmpdatarec = NULL;

	    free(tempresults); tempresults = NULL;
	    bxh_datarec_free(outdatarec); outdatarec = NULL;
	}

	free(results);
    }

    goto EXIT;

  FAIL:
    retval = -1;

  EXIT:
    bxh_datareaddata_free(&bdr);
    if (opt_maskfile)
	bxh_datareaddata_free(&maskbdr);
    if (templ)
	free(templ);
    if (outputbase_r)
	free(outputbase_r);
    if (outputbxh_r)
	free(outputbxh_r);
    if (outputfile_r)
	free(outputfile_r);
    if (outputfilegz_r)
	free(outputfilegz_r);
    if (outputbase_t)
	free(outputbase_t);
    if (outputbxh_t)
	free(outputbxh_t);
    if (outputfile_t)
	free(outputfile_t);
    if (outputfilegz_t)
	free(outputfilegz_t);
    if (outdatarec)
	bxh_datarec_free(outdatarec);
    if (opt_maskfile) {
	free(opt_maskfile); opt_maskfile = NULL;
    }
    if (opt_optsfromfile) {
	free(opt_optsfromfile); opt_optsfromfile = NULL;
    }
    if (opt_template) {
	free(opt_template); opt_template = NULL;
    }
    if (opt_templatevoxel) {
	free(opt_templatevoxel); opt_templatevoxel = NULL;
    }
    return retval;
}

/*
 * $Log: In-line log eliminated on transition to SVN; use svn log instead. $
 * Revision 1.16  2007/12/10 16:40:04  gadde
 * Write out gzipped NIFTI (with BXH/XCEDE headers) as default
 *
 * Revision 1.15  2006/06/01 20:16:50  gadde
 * const fixes
 *
 * Revision 1.14  2006/03/21 16:43:19  gadde
 * Don't produce NaN when inputs are 0
 *
 * Revision 1.13  2006/03/02 21:48:33  gadde
 * Fix fprintf call.
 *
 * Revision 1.12  2006/02/23 17:47:26  gadde
 * Add option to grab template from a voxel in the data itself.
 *
 * Revision 1.11  2005/09/20 18:37:55  gadde
 * Updates to versioning, help and documentation, and dependency checking
 *
 * Revision 1.10  2005/09/19 16:31:56  gadde
 * Documentation and help message updates.
 *
 * Revision 1.9  2005/09/14 15:12:46  gadde
 * Some -Wall fixes.
 *
 * Revision 1.8  2005/09/14 14:49:30  gadde
 * Type conversion updates to fix win32 warnings
 *
 * Revision 1.7  2004/12/13 20:07:52  gadde
 * Initialize bdr before first FAIL.
 *
 * Revision 1.6  2004/12/13 20:06:39  gadde
 * Remove redundant initialization.
 *
 * Revision 1.5  2004/12/13 19:34:00  gadde
 * Fix --overwrite option.
 *
 * Revision 1.4  2004/12/13 19:23:40  gadde
 * Add --overwrite option.
 *
 * Revision 1.3  2004/12/09 16:42:36  gadde
 * Initialize bdr/maskbdr to 0.
 *
 * Revision 1.2  2004/11/15 14:41:22  gadde
 * Add ability to read options from file, and update usage info.
 *
 * Revision 1.1  2004/11/12 15:03:47  gadde
 * Initial commit.
 *
 */
