static const char rcsid[] = "$Id: fmriqa_spikiness.cpp,v 1.33 2009-02-17 18:34:35 gadde Exp $";

/*
 * fmriqa_detspikes.cpp --
 * 
 *  calculate a spikiness metric for each voxel in the given data.
 *  Thanks to Nate White and Doug Greve for the default method
 *  (mean detrend + jackknife).
 */

#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifndef WIN32
#include <unistd.h>
#endif
#include <math.h>
#include <string.h>

#include <vector>

#include <f2c.h>

#ifdef WIN32
#include <pstdint.h>
#include <float.h>
#ifndef NAN
static const uint32_t nan[1] = {0x7fffffff};
#define NAN (*(const float *) &nan[0])
#endif
#define MYNAN NAN
#ifndef isnan
#define isnan(x) _isnan(x)
#endif
#else /* #ifdef WIN32 */
/* this works on Linux */
#define MYNAN (0.0/0.0)
#endif

#include "bxh_niftilib.h"
#include "bxh_datarec.h"
#include "opts.h"

#ifndef XMLH_VERSIONSTR
#define XMLH_VERSIONSTR "(no version specified)"
#endif

#ifndef M_PI
#define M_PI 3.14159265358979323846264338327950288
#endif

#ifdef __cplusplus
extern "C" {
#endif
extern int cl1_(integer *k, integer *l, integer *m, integer *n, 
		integer *klmd, integer *klm2d, integer *nklmd,
		integer *n2d, real *q, integer *kode, real *toler,
		integer *iter, real *x, real *res, real * error, real *cu,
		integer *iu, integer *s);
#ifdef __cplusplus
}
#endif

#define CONVERTTEMPLATE(inbuf, fromtype, bufsize, retbuf, totype) {	\
    fromtype * buf = NULL;						\
    fromtype * endbuf = (fromtype *)((char *)inbuf + (bufsize));	\
    size_t retsize = sizeof(totype)*((bufsize)/sizeof(*buf));		\
    totype * newbuf = NULL;						\
    newbuf = (totype *)malloc(retsize);					\
    (retbuf) = newbuf;							\
    if ((newbuf) == NULL) {						\
	fprintf(stderr, "Error allocating %lld bytes\n", (long long int)retsize);	\
    }									\
    for (buf = (fromtype *)(inbuf); buf < (endbuf); newbuf++, buf++) {	\
	*(newbuf) = (totype)*buf;					\
    }									\
}

static float *
convertBufToFloat(const void * inbuf, size_t bufsize, const char * elemtype)
{
    float * retbuf = NULL;
    if (strcmp(elemtype, "int8") == 0) {
	CONVERTTEMPLATE(inbuf, char, bufsize, retbuf, float);
    } else if (strcmp(elemtype, "uint8") == 0) {
	CONVERTTEMPLATE(inbuf, unsigned char, bufsize, retbuf, float);
    } else if (strcmp(elemtype, "int16") == 0) {
	CONVERTTEMPLATE(inbuf, short, bufsize, retbuf, float);
    } else if (strcmp(elemtype, "uint16") == 0) {
	CONVERTTEMPLATE(inbuf, unsigned short, bufsize, retbuf, float);
    } else if (strcmp(elemtype, "int32") == 0) {
	CONVERTTEMPLATE(inbuf, int, bufsize, retbuf, float);
    } else if (strcmp(elemtype, "uint32") == 0) {
	CONVERTTEMPLATE(inbuf, unsigned int, bufsize, retbuf, float);
    } else if (strcmp(elemtype, "float32") == 0) {
	retbuf = (float *)malloc(bufsize);
	memcpy(retbuf, inbuf, bufsize);
    } else if (strcmp(elemtype, "float64") == 0) {
	CONVERTTEMPLATE(inbuf, double, bufsize, retbuf, float);
    } else if (strcmp(elemtype, "double") == 0) {
	CONVERTTEMPLATE(inbuf, double, bufsize, retbuf, float);
    }
    return retbuf;
}

int
main(int argc, char *argv[])
{
    int retval = 0;
    struct stat statbuf;
    const char * inputfile = NULL;
    const char * outputarg = NULL;
    char * outputbase = NULL;
    char * outputfile = NULL;
    char * outputbxh = NULL;
    int dimnum;
    int msbfirst = 1;
    FILE * fp = NULL;
    char * extpos = NULL;
    int opt_overwrite = 0;
    int opt_verbose = 0;
    const char * ordereddimnames[] = { "x", "y", "z", "t" };
    const char * opt_select[4] = {":", ":", ":", ":"};
    const char * opt_metric = "jackknife";
    double opt_brainthresh = -HUGE_VAL;
    const char * opt_fit_method = "mean";
    int opt_version = 0;
    struct bxhdataread bdr;
    float * dataptr = NULL;
    double * sums = NULL;
    size_t skipped = 0;
    float val;
    
    const int numopts = 12;
    opt_data opts[12] = {
	{ 0x0, OPT_VAL_NONE, NULL, 0, "",
	  "Usage:\n"
	  "  fmriqa_spikiness [opts] xmlfile [outputbase]\n\n"
	  "This program is usually called by wrapper scripts, and "
	  "may not be useful to users on its own.  "
	  "This program takes a 4-D BXH- or XCEDE- wrapped dataset and "
	  "calculates a 'spikiness' metric.  Various 'spikiness' metrics "
	  "are available and are selected using options.  "
	  "The size and meaning of the output data is dependent on the "
	  "metric being calculated."
	},
	{ 0x0, OPT_VAL_NONE, NULL, 0, "", "" },
	{ OPT_FLAGS_FULL, OPT_VAL_BOOL, &opt_overwrite, 1, "overwrite",
	  "Overwrite output files if they exist." },
	{ OPT_FLAGS_FULL, OPT_VAL_BOOL, &opt_version, 1, "version",
	  "Print version string and exit." },
	{ OPT_FLAGS_FULL, OPT_VAL_BOOL, &opt_verbose, 1, "verbose",
	  "More diagnostic output." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_metric, 1, "metric",
	  "Which metric to return after fitting/detrending data.\n"
	  "'diff' returns (value-fit) per voxel.\n"
	  "'zscore' returns (value-fit)/stddev per voxel.\n"
	  "'abszscore' returns (value-fit)/stddev per voxel.\n"
	  "'afni' returns abs(value-fit)/mstddev per voxel "
	  "(i.e. same as returned by Robert Cox's AFNI 3dDespike) "
	  "where mstddev is a modified standard deviation "
	  "that is less influenced by outlier points.\n"
	  "'abszscoreslice' returns average abs(value-fit)/stddev per slice.\n"
	  "'jackknife' (default) takes the output of 'abszscoreslice' and "
	  "finds the \"jackknife\" z-score of each slice (over the volume) "
	  "where the current slice is ignored in calculating mean/stddev.\n"
	  "'jackknife' and 'abszscoreslice' produce a 2-D result set, "
	  "whereas every other metric produces a 4-D result set."
	},
	{ OPT_FLAGS_FULL, OPT_VAL_DOUBLE, &opt_brainthresh, 1, "brainthresh",
	  "Only voxels with a value greater than its_brainthresh are used in "
	  "the calculation.  Other voxels will return a metric of 0.  "
	  "Default is minus infinity or thereabouts." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_fit_method, 1, "fit_method",
	  "Which fitting/detrending method to use.\n"
	  "'mean' (default) simply uses the mean of each voxel's "
	  "time-course.\n"
	  "'linear' does a linear L1 fit of each voxel time-course.\n"
	  "'afni' L1-fits the function used in Robert Cox's "
	  "AFNI 3dDespike program to each voxel's time-course." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_select[3], 1, "timeselect",
	  "Comma-separated list of timepoints to use (first timepoint is 0).  "
	  "Any timepoint can be a contiguous range, specified as two "
	  "numbers separated by a colon, i.e. 'START:END'.  "
	  "An empty END implies the last timepoint.  "
	  "The default step of 1 (one) in ranges can be changed using "
	  "'START:STEP:END', which is equivalent to "
	  "'START,START+STEP,START+(2*STEP),...,END'." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_select[0], 1, "xselect",
	  "Just like timeselect, but for the 'x' dimension." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_select[1], 1, "yselect",
	  "Just like timeselect, but for the 'y' dimension." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_select[2], 1, "zselect",
	  "Just like timeselect, but for the 'z' dimension." }
    };

    memset(&bdr, '\0', sizeof(bdr));

    argc -= opt_parse(argc, argv, numopts, &opts[0], 0);

    if (opt_version) {
	fprintf(stdout, "%s\n", XMLH_VERSIONSTR);
	exit(0);
    }
    if (strcmp(opt_metric, "diff") != 0 &&
	strcmp(opt_metric, "zscore") != 0 &&
	strcmp(opt_metric, "abszscore") != 0 &&
	strcmp(opt_metric, "afni") != 0 &&
	strcmp(opt_metric, "abszscoreslice") != 0 &&
	strcmp(opt_metric, "jackknife") != 0) {
	fprintf(stderr, "Metric '%s' not supported.  Use --help for usage.\n",
		opt_metric);
	goto FAIL;
    }
    if (strcmp(opt_fit_method, "mean") != 0 &&
	strcmp(opt_fit_method, "linear") != 0 &&
	strcmp(opt_fit_method, "afni") != 0) {
	fprintf(stderr, "Fit method '%s' not supported.  Use --help for usage.\n",
		opt_fit_method);
	goto FAIL;
    }

    if (argc < 3) {
	fprintf(stderr, "Usage: %s [opts] xmlfile outputfile\n", argv[0]);
	fprintf(stderr, "Use the --help option for more help.\n");
	goto FAIL;
    }

    msbfirst = (((char *)&msbfirst)[0] == 0);

    inputfile = argv[1];
    outputarg = argv[2];
    outputbase = (char *)malloc(sizeof(char)*(strlen(outputarg) + 1));
    outputfile = (char *)malloc(sizeof(char)*(strlen(outputarg) + 8));
    outputbxh = (char *)malloc(sizeof(char)*(strlen(outputarg) + 5));
    strcpy(outputbase, outputarg);
    strcpy(outputbxh, outputarg);
    extpos = strrchr(outputbase, '.');
    if (extpos != NULL) {
	*extpos = '\0';
    }
    strcpy(outputfile, outputbase);
    strcat(outputfile, ".nii.gz");
    if (!opt_overwrite) {
	if (stat(outputfile, &statbuf) == 0) {
	    fprintf(stderr, "ERROR: %s: output file '%s' exists.\n", argv[0], outputfile);
	    goto FAIL;
	}
	if (stat(outputbxh, &statbuf) == 0) {
	    fprintf(stderr, "ERROR: %s: output file '%s' exists.\n", argv[0], outputbxh);
	    goto FAIL;
	}
    }

    if (bxh_dataReadFileStart(inputfile, "image", NULL, 4, ordereddimnames, opt_select, &bdr) != 0) {
	fprintf(stderr, "Error preparing data read for '%s'.\n", argv[1]);
	goto FAIL;
    }
    if (bxh_dataReadFinish(&bdr, "float") != 0) {
	fprintf(stderr, "Error finishing data read for '%s'.\n", inputfile);
	goto FAIL;
    }
    dataptr = (float *)bdr.dataptr;
    if (bdr.datarec->numdims != 4) {
	fprintf(stderr, "Data must be 4-dimensional.\n");
	goto FAIL;
    }

#define C2I(x,y,z,t) ((x) + (y)*bdr.pagesizes[0] + (z)*bdr.pagesizes[1] + (t)*bdr.pagesizes[2])
	
    /** First, detrend data (dataptr is overwritten with residuals) **/
    if (strcmp(opt_fit_method, "mean") == 0) {
	size_t xyzind;
	size_t t;
	float mean;
	for (xyzind = 0; xyzind < bdr.pagesizes[2]; xyzind++) {
	    mean = 0;
	    for (t = 0; t < bdr.dimsizes[3]; t++) {
		mean += dataptr[(t*bdr.pagesizes[2])+xyzind];
	    }
	    mean /= bdr.dimsizes[3];
	    for (t = 0; t < bdr.dimsizes[3]; t++) {
		dataptr[(t*bdr.pagesizes[2])+xyzind] -= mean;
	    }
	}
    } else if (strcmp(opt_fit_method, "linear") == 0) {
	integer k, l, m, n;
	integer klmd, klm2d, nklmd, n2d;
	real * qtemplate = NULL;
	real * q = NULL;
	integer kode;
	real toler;
	integer iter;
	real * x = NULL;
	real * res = NULL;
	real error;
	real * cu = NULL;
	integer * iu = NULL;
	integer  * s = NULL;
	size_t xyzind;
	integer kind;

	/* see cl1-552.c for parameter details */
	k = bdr.dimsizes[3]; /* number of points in vector */
	l = 0; /* no equality constraints */
	m = 0; /* no inequality constraints */
	n = 2; /* number of terms in polynomial */
	klmd = k + l + m;
	klm2d = k + l + m + 2;
	nklmd = n + k + l + m;
	n2d = n + 2;
	toler = (real)pow((double)10, (double)(-6.0 * 2.0 / 3.0)); /* tolerance for six digits of precision in a float */
	/* allocate memory for fortran arguments/results */
	qtemplate = (real *)malloc(sizeof(real) * klm2d * n2d);
	q = (real *)malloc(sizeof(real) * klm2d * n2d); /* input matrix */
	x = (real *)malloc(sizeof(real) * n2d); /* output solution */
	res = (real *)malloc(sizeof(real) * klmd); /* output residuals */
	cu = (real *)malloc(sizeof(real) * 2 * nklmd); /* workspace */
	iu = (integer *)malloc(sizeof(real) * 2 * nklmd); /* workspace */
	s = (integer *)malloc(sizeof(real) * klmd); /* workspace */

	/* initialize fortran input arrays */
	/* from the cl1 documentation, initialize Q with: */
	/*             A B */
	/*         Q = C D */
	/*             E F */
	/* (C, D, E, and F are empty in our case) */
	/* don't forget Fortran stores arrays in column-major order */
	memset(qtemplate, '\0', sizeof(real) * klm2d * n2d);
	/* initialize "A" in Q */
	for (kind = 0; kind < k; kind++) {
	    /* a */
	    qtemplate[(0*klm2d)+kind] = 1;
	}
	for (kind = 0; kind < k; kind++) {
	    /* b*t */
	    /* make t "straddle" 0 */
	    qtemplate[(1*klm2d)+kind] = (real)(kind - (0.5 * (k - 1)));
	}
	/* do for each voxel */
	for (xyzind = 0; xyzind < bdr.pagesizes[2]; xyzind++) {
	    int overthresh = 0;
	    /* linear fit for each voxel time-course */
	    if (opt_verbose && xyzind % bdr.pagesizes[1] == 0) {
		fprintf(stderr, "slice %d/%d\r",
			(int)(xyzind/bdr.pagesizes[1]), (int)bdr.dimsizes[2]);
	    }
	    /* some per-call initializations (these are overwritten by cl1_) */
	    kode = 0;
	    error = 0;
	    iter = 10 * (k + l + m); /* number of iterations */
	    /* copy Q template to actual input matrix */
	    memcpy(q, qtemplate, sizeof(real) * klm2d * n2d);
	    /* initialize "B" in Q */
	    for (kind = 0; kind < k; kind++) {
		val = dataptr[(kind*bdr.pagesizes[2])+xyzind];
		q[(n*klm2d)+kind] = val;
		if ((double)val > opt_brainthresh)
		    overthresh = 1;
	    }
	    if (overthresh) {
		cl1_(&k, &l, &m, &n, &klmd, &klm2d, &nklmd, &n2d, q, &kode, &toler, &iter, x, res, &error, cu, iu, s);
	    } else {
		skipped++;
		kode = 0;
		memset(res, '\0', sizeof(real) * klmd);
	    }
	    if (kode == 0) {
		/* success */
		/* x now has params (a, b, c, d1, ..., dn, e1, ..., en),
		 * but we don't care, we just want the residuals */
		for (kind = 0; kind < k; kind++) {
		    dataptr[(kind*bdr.pagesizes[2])+xyzind] = res[kind];
		}
#if 0
		fprintf(stderr, "voxel (%3d,%3d,%3d): fit succeeded\n",
			xyzind%bdr.dimsizes[0],
			(xyzind/bdr.pagesizes[0])%bdr.dimsizes[1],
			(xyzind/bdr.pagesizes[1])%bdr.dimsizes[2]);
#endif
	    } else {
		/* fit failure, just keep the data the way it is */
		/* XXX (is there a better way to flag this condition?) */
		for (kind = 0; kind < k; kind++) {
		    dataptr[(kind*bdr.pagesizes[2])+xyzind] = 0;
		}
#if 1
		fprintf(stderr, "voxel (%3d,%3d,%3d): fit failed\n",
			xyzind%bdr.dimsizes[0],
			(xyzind/bdr.pagesizes[0])%bdr.dimsizes[1],
			(xyzind/bdr.pagesizes[1])%bdr.dimsizes[2]);
#endif
	    }
	}
	free(qtemplate);
	free(q);
	free(x);
	free(res);
	free(cu);
	free(iu);
	free(s);
    } else if (strcmp(opt_fit_method, "afni") == 0) {
	/* This algorithm based on documentation provided at:
	   http://afni.nimh.nih.gov/old/afni/despike.shtml
	   To perform the L1-fit, AFNI uses algorithm TOMS 552
	   available from netlib.org, converted from Fortran
	   using f2c.  This approach is also used here for
	   consistency because if it's good enough for Bob Cox,
	   it's good enough for us. */
	int numwaves;
	integer k, l, m, n;
	integer klmd, klm2d, nklmd, n2d;
	real * qtemplate = NULL;
	real * q = NULL;
	integer kode;
	real toler;
	integer iter;
	real * x = NULL;
	real * res = NULL;
	real error;
	real * cu = NULL;
	integer * iu = NULL;
	integer  * s = NULL;
	size_t xyzind;
	integer kind, nind;

	numwaves = bdr.dimsizes[3] / 30; /* default suggested by Cox */
	if (numwaves == 0) numwaves = 1;

	/* see cl1-552.c for parameter details */
	k = bdr.dimsizes[3]; /* number of points in vector */
	l = 0; /* no equality constraints */
	m = 0; /* no inequality constraints */
	n = 3 + numwaves + numwaves; /* number of terms in polynomial */
	klmd = k + l + m;
	klm2d = k + l + m + 2;
	nklmd = n + k + l + m;
	n2d = n + 2;
	toler = (real)pow((double)10, (double)(-6.0 * 2.0 / 3.0)); /* tolerance for six digits of precision in a float */
	/* allocate memory for fortran arguments/results */
	qtemplate = (real *)malloc(sizeof(real) * klm2d * n2d);
	q = (real *)malloc(sizeof(real) * klm2d * n2d); /* input matrix */
	x = (real *)malloc(sizeof(real) * n2d); /* output solution */
	res = (real *)malloc(sizeof(real) * klmd); /* output residuals */
	cu = (real *)malloc(sizeof(real) * 2 * nklmd); /* workspace */
	iu = (integer *)malloc(sizeof(real) * 2 * nklmd); /* workspace */
	s = (integer *)malloc(sizeof(real) * klmd); /* workspace */

	/* initialize fortran input arrays */
	/* from the cl1 documentation, initialize Q with: */
	/*             A B */
	/*         Q = C D */
	/*             E F */
	/* (C, D, E, and F are empty in our case) */
	/* don't forget Fortran stores arrays in column-major order */
	memset(qtemplate, '\0', sizeof(real) * klm2d * n2d);
	/* initialize "A" in Q */
	for (kind = 0; kind < k; kind++) {
	    /* a */
	    qtemplate[(0*klm2d)+kind] = 1;
	}
	for (kind = 0; kind < k; kind++) {
	    /* b*t */
	    /* make t "straddle" 0 */
	    qtemplate[(1*klm2d)+kind] = (real)(kind - (0.5 * (k - 1)));
	}
	for (kind = 0; kind < k; kind++) {
	    /* c*t*t */
	    /* base t*t on above calculated t */
	    qtemplate[(2*klm2d)+kind] = qtemplate[(1*klm2d)+kind] * qtemplate[(1*klm2d)+kind];
	}
	for (nind = 3; nind < 3 + numwaves; nind++) {
	    /* d_i*sin(2*PI*i*t/T) */
	    int i = nind - 2;
	    for (kind = 0; kind < k; kind++) {
		qtemplate[(nind*klm2d)+kind] = (real)sin(2.0*M_PI*i*kind/k);
	    }
	}
	for (nind = 3 + numwaves; nind < 3 + numwaves + numwaves; nind++) {
	    /* e_i*cos(2*PI*i*t/T) */
	    int i = nind - 3 - numwaves + 1;
	    for (kind = 0; kind < k; kind++) {
		qtemplate[(nind*klm2d)+kind] = (real)cos(2.0*M_PI*i*kind/k);
	    }
	}
	/* (initialize "B" inside loop, below) */

#if 0
	fprintf(stderr, "Q[0..2] = \n");
	for (kind = 0; kind < k; kind++) {
	    for (nind = 0; nind < 3; nind++) {
		fprintf(stderr, "%9g ", qtemplate[(nind*klm2d)+kind]);
	    }
	    fprintf(stderr, "\n");
	}
	fprintf(stderr, "Q[3..3+numwaves-1] = \n");
	for (kind = 0; kind < k; kind++) {
	    for (nind = 3; nind < 3 + numwaves; nind++) {
		fprintf(stderr, "%9g ", qtemplate[(nind*klm2d)+kind]);
	    }
	    fprintf(stderr, "\n");
	}
	fprintf(stderr, "Q[3+numwaves..3+numwaves+numwaves-1] = \n");
	for (kind = 0; kind < k; kind++) {
	    for (nind = 3 + numwaves; nind < 3 + numwaves + numwaves; nind++) {
		fprintf(stderr, "%9g ", qtemplate[(nind*klm2d)+kind]);
	    }
	    fprintf(stderr, "\n");
	}
#endif

	for (xyzind = 0; xyzind < bdr.pagesizes[2]; xyzind++) {
	    int overthresh = 0;
	    if (opt_verbose && xyzind % bdr.pagesizes[1] == 0) {
		fprintf(stderr, "slice %d/%d\r",
			(int)(xyzind/bdr.pagesizes[1]), (int)bdr.dimsizes[2]);
	    }
	    /* some per-call initializations (these are overwritten by cl1_) */
	    kode = 0;
	    error = 0;
	    iter = 10 * (k + l + m); /* number of iterations */
	    /* copy Q template to actual input matrix */
	    memcpy(q, qtemplate, sizeof(real) * klm2d * n2d);
	    /* initialize "B" in Q */
	    for (kind = 0; kind < k; kind++) {
		val = dataptr[(kind*bdr.pagesizes[2])+xyzind];
		q[(n*klm2d)+kind] = val;
		if ((double)val > opt_brainthresh)
		    overthresh = 1;
	    }
#if 0
	    fprintf(stderr, "Q[3+numwaves+numwaves] = \n");
	    for (kind = 0; kind < k; kind++) {
		fprintf(stderr, "%9g ", q[(n*klm2d)+kind]);
	    }
	    fprintf(stderr, "\n");
#endif
	    if (overthresh) {
		cl1_(&k, &l, &m, &n, &klmd, &klm2d, &nklmd, &n2d, q, &kode, &toler, &iter, x, res, &error, cu, iu, s);
	    } else {
		skipped++;
		kode = 0;
		memset(res, '\0', sizeof(real) * klmd);
	    }
	    if (kode == 0) {
		/* success */
		/* x now has params (a, b, c, d1, ..., dn, e1, ..., en),
		 * but we don't care, we just want the residuals */
#if 0
		fprintf(stderr, "x = [");
		for (nind = 0; nind < n; nind++) {
		    fprintf(stderr, "%g ", x[nind]);
		}
		fprintf(stderr, "]\n");
#endif
#if 0
		fprintf(stderr, "[value fit]\n");
		for (kind = 0; kind < k; kind++) {
		    double fitval = x[0];
		    fitval += x[1] * k;
		    fitval += x[2] * k * k;
		    for (nind = 3; nind < 3 + numwaves; nind++) {
			int i = nind - 3 + 1;
			fitval += x[nind] * sin(2*M_PI*i*kind/k);
		    }
		    for (nind = 3 + numwaves; nind < 3 + numwaves + numwaves; nind++) {
			int i = nind - 3 - numwaves + 1;
			fitval += x[nind] * cos(2*M_PI*i*kind/k);
		    }
		    fprintf(stderr, "%9g %9g\n", dataptr[(kind*bdr.pagesizes[2])+xyzind], fitval);
		}
#endif
		for (kind = 0; kind < k; kind++) {
		    dataptr[(kind*bdr.pagesizes[2])+xyzind] = res[kind];
		}
#if 0
		fprintf(stderr, "voxel (%3d,%3d,%3d): fit succeeded\n",
			xyzind%bdr.dimsizes[0],
			(xyzind/bdr.pagesizes[0])%bdr.dimsizes[1],
			(xyzind/bdr.pagesizes[1])%bdr.dimsizes[2]);
#endif
	    } else {
		/* fit failure, just keep the data the way it is */
		/* XXX (is there a better way to flag this condition?) */
		for (kind = 0; kind < k; kind++) {
		    dataptr[(kind*bdr.pagesizes[2])+xyzind] = 0;
		}
#if 1
		fprintf(stderr, "voxel (%3d,%3d,%3d): fit failed\n",
			xyzind%bdr.dimsizes[0],
			(xyzind/bdr.pagesizes[0])%bdr.dimsizes[1],
			(xyzind/bdr.pagesizes[1])%bdr.dimsizes[2]);
#endif
	    }
	}
	free(qtemplate);
	free(q);
	free(x);
	free(res);
	free(cu);
	free(iu);
	free(s);
    } else {
	fprintf(stderr, "Detrend method %s not recognized\n", opt_fit_method);
	exit(-1);
    }

    if (opt_verbose) {
	fprintf(stderr, "Skipped %lu voxels under threshhold %g\n", (unsigned long)skipped, opt_brainthresh);
    }

    /* dataptr now has residuals of the fit */
    /* modify dataptr, if needed, to reflect chosen metric */
    if (strcmp(opt_metric, "diff") == 0) {
	/* do nothing */
    } else if (strcmp(opt_metric, "zscore") == 0 ||
	       strcmp(opt_metric, "abszscore") == 0) {
	size_t xyzind;
	for (xyzind = 0; xyzind < bdr.pagesizes[2]; xyzind++) {
	    size_t t;
	    double var = 0;
	    double stddev = 0;
	    size_t pageind = C2I(xyzind, 0, 0, 0);
	    for (t = 0; t < bdr.dimsizes[3]; t++) {
		double dev = dataptr[(t*bdr.pagesizes[2])+pageind];
		var += (dev * dev);
	    }
	    var /= bdr.dimsizes[3];
	    stddev = sqrt(var);
	    for (t = 0; t < bdr.dimsizes[3]; t++) {
		if (stddev == 0)
		    dataptr[(t*bdr.pagesizes[2])+pageind] = 0;
		else
		    dataptr[(t*bdr.pagesizes[2])+pageind] /= (float)stddev;
	    }
	}
	if (strcmp(opt_metric, "abszscore") == 0) {
	    size_t xyztind;
	    for (xyztind = 0; xyztind < bdr.pagesizes[2]; xyztind++) {
		if (dataptr[xyztind] < 0)
		    dataptr[xyztind] *= -1;
	    }
	}
    } else if (strcmp(opt_metric, "afni") == 0) {
	/* again, from http://afni.nimh.nih.gov/old/afni/despike.shtml */
	float * sorted = NULL;
	float * sorttmp = NULL;
	size_t xyzind;
	double mstddev = 0;
	sorted = (float *)malloc(sizeof(float)*bdr.dimsizes[3]);
	sorttmp = (float *)malloc(sizeof(float)*bdr.dimsizes[3]);
	for (xyzind = 0; xyzind < bdr.pagesizes[2]; xyzind++) {
	    size_t setsize;
	    size_t numt = bdr.dimsizes[3];
	    size_t t;
	    /* convert to absolute values */
	    for (t = 0; t < numt; t++) {
		dataptr[(t*bdr.pagesizes[2])+xyzind] = fabs(dataptr[(t*bdr.pagesizes[2])+xyzind]);
	    }
	    /* find median via mergesort */
	    for (t = 0; t < numt; t++) {
		sorted[t] = dataptr[(t*bdr.pagesizes[2])+xyzind];
	    }
	    setsize = 1;
	    while (setsize < numt) {
		t = 0;
		size_t setstart = 0;
		for (setstart = 0; setstart < numt; setstart += (2 * setsize)) {
		    size_t start1, start2, end1, end2;
		    start1 = setstart;
		    start2 = end1 = start1 + setsize;
		    end2 = start2 + setsize;
		    if (end2 > numt) { end2 = numt; }
		    if (start2 >= numt) { start2 = numt; }
		    if (end1 > numt) { end1 = numt; }
		    while (start1 < end1 && start2 < end2) {
			if (sorted[start1] < sorted[start2])
			    sorttmp[t++] = sorted[start1++];
			else
			    sorttmp[t++] = sorted[start2++];
		    }
		    while (start1 < end1) {
			sorttmp[t++] = sorted[start1++];
		    }
		    while (start2 < end2) {
			sorttmp[t++] = sorted[start2++];
		    }
		}
		{
		    float * swap = sorted;
		    sorted = sorttmp;
		    sorttmp = swap;
		}
		setsize *= 2;
	    }
	    /* median is in sorted[numt/2], compute estimated stddev */
	    mstddev = sqrt(2*M_PI) * sorted[numt/2];
	    for (t = 0; t < numt; t++) {
		if (mstddev == 0)
		    dataptr[(t*bdr.pagesizes[2])+xyzind] = 0;
		else
		    dataptr[(t*bdr.pagesizes[2])+xyzind] /= (float)mstddev;
	    }
	}
	free(sorted);
	free(sorttmp);
    } else if (strcmp(opt_metric, "jackknife") == 0 ||
	       strcmp(opt_metric, "abszscoreslice") == 0) {
	float * aaz = (float *)malloc(sizeof(float)*bdr.dimsizes[2]*bdr.dimsizes[3]);
	size_t t, z, xyind, xyzind;
	/* overwrite dataptr with abs. z-score over each voxel's time course */
	for (xyzind = 0; xyzind < bdr.pagesizes[2]; xyzind++) {
	    size_t t;
	    double var = 0;
	    double stddev = 0;
	    size_t pageind = C2I(xyzind, 0, 0, 0);
	    for (t = 0; t < bdr.dimsizes[3]; t++) {
		double dev = dataptr[(t*bdr.pagesizes[2])+pageind];
		var += (dev * dev);
	    }
	    var /= bdr.dimsizes[3];
	    stddev = sqrt(var);
	    for (t = 0; t < bdr.dimsizes[3]; t++) {
		size_t ind = (t*bdr.pagesizes[2])+pageind;
		if (stddev == 0)
		    dataptr[ind] = MYNAN;
		else
		    dataptr[ind] /= (float)stddev;
		if (dataptr[ind] < 0) {
		    dataptr[ind] *= -1;
		}
	    }
	}
	for (t = 0; t < bdr.dimsizes[3]; t++) {
	    for (z = 0; z < bdr.dimsizes[2]; z++) {
		float val;
		size_t pagebase;
		double mean;
		size_t num;
		pagebase = t*bdr.pagesizes[2] + z*bdr.pagesizes[1] + 0;
		mean = 0;
		num = 0;
		for (xyind = 0; xyind < bdr.pagesizes[1]; xyind++) {
		    val = dataptr[pagebase + xyind];
		    if (!isnan(val)) {
			mean += val;
			num++;
		    }
		}
		if (num) {
		    mean /= num;
		} else {
		    mean = MYNAN;
		}
		aaz[z*bdr.dimsizes[3] + t] = (float)mean;
	    }
	}
#if 0
	for (t = 0; t < bdr.dimsizes[3]; t++) {
	    fprintf(stderr, "t=%lu:", (unsigned long)t);
	    for (z = 0; z < bdr.dimsizes[2]; z++) {
		fprintf(stderr, " %g", (double)aaz[z*bdr.dimsizes[3] + t]);
	    }
	    fprintf(stderr, "\n");
	}
#endif
	if (strcmp(opt_metric, "abszscoreslice") == 0) {
	    for (t = 0; t < bdr.dimsizes[3]; t++) {
		for (z = 0; z < bdr.dimsizes[2]; z++) {
		    if (isnan(aaz[z*bdr.dimsizes[3] + t])) {
			aaz[z*bdr.dimsizes[3] + t] = 0;
		    }
		}
	    }
	    dataptr = aaz;
	} else {
	    /* jackknife (when calculating a given slice's z-score, ignore
	     * current slice in mean/stddev) */
	    float * jkm = (float *)malloc(sizeof(float)*bdr.dimsizes[2]*bdr.dimsizes[3]); /* jackknifed mean */
	    float * jks = (float *)malloc(sizeof(float)*bdr.dimsizes[2]*bdr.dimsizes[3]); /* jackknifed stddev */
	    float * jkz = (float *)malloc(sizeof(float)*bdr.dimsizes[2]*bdr.dimsizes[3]); /* jackknifed z-score */
	    for (t = 0; t < bdr.dimsizes[3]; t++) {
		int ind;
		double sum;
		double cursum;
		double mean;
		double dev2sum;
		double curdev2sum;
		double dev;
		size_t num;
		sum = 0;
		num = 0;
		for (z = 0; z < bdr.dimsizes[2]; z++) {
		    ind = z*bdr.dimsizes[3] + t;
		    if (!isnan(aaz[ind])) {
			sum += aaz[ind];
			num++;
		    }
		}
		if (num) {
		    mean = sum / num;
		    for (z = 0; z < bdr.dimsizes[2]; z++) {
			/* correct each sum individually for jackknifed means */
			ind = z*bdr.dimsizes[3] + t;
			if (!isnan(aaz[ind])) {
			    cursum = sum - aaz[ind];
			    jkm[ind] = (float)(cursum / (num - 1));
			} else {
			    jkm[ind] = MYNAN;
			}
		    }
		    dev2sum = 0;
		    for (z = 0; z < bdr.dimsizes[2]; z++) {
			ind = z*bdr.dimsizes[3] + t;
			if (!isnan(aaz[ind])) {
			    dev = aaz[ind] - mean;
			    dev2sum += dev * dev;
			}
		    }
		    for (z = 0; z < bdr.dimsizes[2]; z++) {
			/* correct each dev2sum individually for jackknifed stddevs */
			/* if n = total number of slices
			 *    m_n     = mean over all n points (mean)
			 *    m_{n-1} = mean over all but current point (in jkm)
			 *    x_n     = value of point we are excluding (in aaz)
			 *    d_n     = sum of sq. deviations over all n (dev2sum)
			 *    d_{n-1} = sum of sq. devs. over all but current point
			 * then
			 *    d_{n-1} = d_n - [ (x_n - m_{n-1}) * (x_n -m_n) ]
			 * If you don't trust the math (which you shouldn't)
			 * I suggest deriving it yourself over lunch.
			 */
			ind = z*bdr.dimsizes[3] + t;
			if (!isnan(aaz[ind]) && !isnan(jkm[ind])) {
			    curdev2sum =
				dev2sum - ((aaz[ind] - jkm[ind]) * (aaz[ind] - mean));
			    jks[ind] = (float)sqrt(curdev2sum / (num - 1));
			} else {
			    jks[ind] = MYNAN;
			}
		    }
		    for (z = 0; z < bdr.dimsizes[2]; z++) {
			ind = z*bdr.dimsizes[3] + t;
			if (jks[ind] == 0 || isnan(jks[ind])) {
			    jkz[ind] = 0;
			} else {
			    jkz[ind] = (aaz[ind] - jkm[ind]) / jks[ind];
			}
			if (jkz[ind] < 0) {
			    jkz[ind] *= -1;
			}
		    }
		} else {
		    for (z = 0; z < bdr.dimsizes[2]; z++) {
			ind = z*bdr.dimsizes[3] + t;
			jkz[ind] = 0;
		    }
		}
	    }
#if 0
	    for (t = 0; t < bdr.dimsizes[3]; t++) {
		fprintf(stderr, "t=%lu:", (unsigned long)t);
		for (z = 0; z < bdr.dimsizes[2]; z++) {
		    fprintf(stderr, " %g", (double)jkz[z*bdr.dimsizes[3] + t]);
		}
		fprintf(stderr, "\n");
	    }
#endif
	    free(aaz);
	    free(jkm);
	    free(jks);
	    dataptr = jkz;
	}
	
	/* modify newdatarec to reflect 2-D data in dataptr */
	bdr.datarec->numdims = 2;
	bxh_datarec_dimdata_free(&bdr.datarec->dimensions[0]);
	bxh_datarec_dimdata_free(&bdr.datarec->dimensions[1]);
	bxh_datarec_dimdata_free(&bdr.datarec->dimensions[2]);
	bxh_datarec_dimdata_free(&bdr.datarec->dimensions[3]);
	memcpy(&bdr.datarec->dimensions[0], &bdr.datarec->dimensions[3], sizeof(bdr.datarec->dimensions[3]));
	memcpy(&bdr.datarec->dimensions[1], &bdr.datarec->dimensions[2], sizeof(bdr.datarec->dimensions[2]));
	bdr.datarec->dimensions[0].type = strdup("t");
	bdr.datarec->dimensions[1].type = strdup("z");
	bdr.dimsizes[0] = bdr.dimsizes[3];
	bdr.dimsizes[1] = bdr.dimsizes[2];
	bdr.pagesizes[0] = bdr.dimsizes[0];
	bdr.pagesizes[1] = bdr.dimsizes[0]*bdr.dimsizes[1];
	bdr.datarec->datasize = bdr.pagesizes[1] * sizeof(float);
    } else {
	fprintf(stderr, "Requested metric %s not recognized\n", opt_metric);
	exit(-1);
    }

    /* write out results */
    free(bdr.datarec->elemtype);
    bdr.datarec->elemtype = strdup("float32");
    if (bxh_datarec_writeToElement(bdr.imagedatap, bdr.datarec) != 0) {
	fprintf(stderr, "Failed writing datarec\n");
	goto FAIL;
    }
    if (bxh_addAutoHistoryEntry(bdr.docp, argv[0], &inputfile, 1) != 0) {
	fprintf(stderr, "Error adding history entry\n");
	goto FAIL;
    }
    writeBXHAndNIIGZ(outputbase, &bdr, dataptr, 0);
    goto EXIT;

  FAIL:
    retval = -1;

  EXIT:
    if (dataptr != bdr.dataptr) {
	free(dataptr); dataptr = NULL;
    }
    bxh_datareaddata_free(&bdr);
    free(outputbxh); outputbxh = NULL;
    free(outputfile); outputfile = NULL;
    free(outputbase); outputbase = NULL;
    return retval;
}

/*
 * $Log: In-line log eliminated on transition to SVN; use svn log instead. $
 * Revision 1.32  2008/07/28 18:36:36  gadde
 * Fix NAN usage for win32
 *
 * Revision 1.31  2008/07/24 20:52:47  gadde
 * Use (0.0/0.0) instead of NAN, as isnan(NAN) does not work correctly in LSB.
 *
 * Revision 1.30  2008/04/04 15:49:34  gadde
 * Add NAN handling for win32
 *
 * Revision 1.29  2008/03/07 23:05:07  gadde
 * Stop using off_t for signed data
 *
 * Revision 1.28  2007/10/04 19:47:43  gadde
 * Possible win32 fix.
 *
 * Revision 1.27  2007/10/03 14:34:28  gadde
 * Ignore values from voxels/slices which don't change at all
 * across the time course.
 *
 * Revision 1.26  2006/06/01 20:16:48  gadde
 * const fixes
 *
 * Revision 1.25  2005/09/20 18:37:52  gadde
 * Updates to versioning, help and documentation, and dependency checking
 *
 * Revision 1.24  2005/09/19 16:31:53  gadde
 * Documentation and help message updates.
 *
 * Revision 1.23  2005/09/14 14:49:24  gadde
 * Type conversion updates to fix win32 warnings
 *
 * Revision 1.22  2004/11/11 14:32:13  gadde
 * Change the dimension names for output to t and z.
 *
 * Revision 1.21  2004/11/10 15:58:10  gadde
 * Fix bug in help.
 *
 * Revision 1.20  2004/11/10 15:47:41  gadde
 * Fix bug in help text.
 *
 * Revision 1.19  2004/09/30 20:07:49  gadde
 * Correct documentation of jackknife algorithm.
 *
 * Revision 1.18  2004/09/30 19:23:21  gadde
 * Replace n^2 jackknife computation with linear-time method.
 *
 * Revision 1.17  2004/06/22 17:52:18  gadde
 * outputfile is relative to current directory.
 *
 * Revision 1.16  2004/06/21 21:41:46  gadde
 * Calculate jackknife mean/stddev with n-1 points (of course).
 *
 * Revision 1.15  2004/06/18 15:21:51  gadde
 * Standardize frag creation (redux)
 *
 * Revision 1.14  2004/06/18 14:10:48  gadde
 * Delete the correct redundant declaration this time.
 *
 * Revision 1.13  2004/06/18 14:09:30  gadde
 * Standardize frag creation
 *
 * Revision 1.12  2004/06/15 20:39:39  gadde
 * Be a little more specific as to pow() arguments.
 *
 * Revision 1.11  2004/06/15 16:16:10  gadde
 * Several -Wall fixes and addition of bxh_datarec_addfrag()
 *
 * Revision 1.10  2004/05/20 15:47:28  gadde
 * don't fail with stddev==0
 *
 * Revision 1.9  2004/05/13 16:45:50  gadde
 * variable name change
 *
 * Revision 1.8  2004/05/12 21:50:29  gadde
 * Use carriage return in diagnostic
 *
 * Revision 1.7  2004/05/12 20:00:18  gadde
 * Add attribution.
 *
 * Revision 1.6  2004/05/12 19:57:04  gadde
 * Add more alternative metrics and fit methods, notably
 * the jackknife method.
 *
 * Revision 1.5  2004/05/10 18:34:57  gadde
 * Actually check the brain threshhold.
 *
 * Revision 1.4  2004/05/06 20:00:15  gadde
 * Standardize output file checking
 *
 * Revision 1.3  2004/05/03 19:42:21  gadde
 * Only print out progress if verbose.
 *
 * Revision 1.2  2004/04/27 14:48:02  gadde
 * Remove dependency on gsl.
 *
 * Revision 1.1  2004/04/27 13:44:01  gadde
 * *** empty log message ***
 *
 *
 */
