static const char rcsid[] = "$Id: bxh_tfilter.c,v 1.34 2009-01-15 20:55:19 gadde Exp $";

/*
 * bxh_tfilter.c --
 * 
 * Run a filter (chebyshev/butterworth) across each voxel's 4th dimension
 * (usually time)
 */

#include <bxh_config.h>

#include <stdio.h>

#if !defined(HAVE_LIBGSL)
int main(int argc, char * argv[])
{
    fprintf(stderr, "Sorry -- to use this program, this package must be compiled with GSL (GNU\nScientific Library) support!\n");
    return -1;
}
#else /* #ifndef HAVE_LIBGSL */

#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifndef WIN32
#include <unistd.h>
#endif
#include <math.h>

#include "bxh_utils.h"
#include "bxh_datarec.h"
#include "opts.h"

#include <gsl/gsl_matrix.h>
#include <gsl/gsl_linalg.h>

#ifndef XMLH_VERSIONSTR
#define XMLH_VERSIONSTR "(no version specified)"
#endif

#ifndef M_PI
#define M_PI 3.14159265358979323846264338327950288
#endif

int
main(int argc, char *argv[])
{
    int retval = 0;
    struct stat statbuf;
    char * outputfile = NULL;
    char * outputbxh = NULL;
    int msbfirst = 1;
    FILE * fp = NULL;
    struct bxhdataread bdr;
    float * dataptr = NULL;
    float * results = NULL;
    float * results2 = NULL; /* needed for bandstop */
    char * extpos = NULL;
    
    int oldargc = argc;

    char * opt_filtertype = NULL;
    double opt_period[2] = { -1, -1 };
    double opt_ripple = 0.0;
    unsigned int opt_order = 6;
    double opt_forcetr = 0;
    int opt_keepdc = 0;
    int opt_overwrite = 0;
    int opt_version = 0;

    double TR = 0;
    double paramfreq[2] = { -1, -1 }; /* will store the cutoff frequencies as a fraction of the sample frequency */
    
    const int numopts = 10;
    opt_data opts[10] = {
	{ 0x0, OPT_VAL_NONE, NULL, 0, "",
	  "Usage:\n"
	  "  bxh_tfilter [opts] input.bxh output.bxh\n\n"
	  "This program runs, on a 4-D data set, a Chebyshev filter "
	  "across each voxel's fourth dimension (e.g. time course) and "
	  "writes the results to output.bxh."
	},
	{ 0x0, OPT_VAL_NONE, NULL, 0, "", "" },
	{ OPT_FLAGS_FULL, OPT_VAL_BOOL, &opt_version, 1, "version",
	  "Print version string and exit." },
	{ OPT_FLAGS_FULL, OPT_VAL_BOOL, &opt_overwrite, 1, "overwrite",
	  "Overwrite existing output files (otherwise error and exit). " },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_filtertype, 1, "filtertype",
	  "This required option chooses the filter type.  Valid choices "
	  "are 'lowpass', 'highpass', 'bandpass, or 'bandstop'.  "
	  "Each filter is parameterized by one or more instances of "
	  "--period.  "
	  "'lowpass' or 'highpass' require one --period option, "
	  "specifying the stop or start frequency respectively.  "
	  "'bandpass' or 'bandstop' require two --period options, "
	  "specifying the start and stop frequencies, in any order "
	  "(larger period/smaller frequency is assumed to be start "
	  "frequency for 'bandpass' and stop frequency for 'bandstop')."
	},
	{ OPT_FLAGS_FULL|OPT_FLAGS_NO_OVERFLOW, OPT_VAL_DOUBLE, &opt_period[0], 2, "period",
	  "This option specifies the frequency parameters for the filter "
	  "in terms of the period (i.e. 1/frequency) in seconds per cycle.  "
	  "May be specified once for 'lowpass' and 'highpass' filter types, "
	  "twice for 'bandpass' and 'bandstop' filter types, "
	  "and must be greater than 0." },
	{ OPT_FLAGS_FULL, OPT_VAL_DOUBLE, &opt_ripple, 1, "ripple",
	  "This option specifies the percent ripple for the Chebyshev "
          "filter.  "
	  "If 0 [zero], which is the default, then the filter is a "
	  "Butterworth filter." },
	{ OPT_FLAGS_FULL, OPT_VAL_UINT, &opt_order, 1, "order",
	  "Order of the filter.  Default is 6." },
	{ OPT_FLAGS_FULL, OPT_VAL_DOUBLE, &opt_forcetr, 1, "forcetr",
	  "If specified, this value (in seconds) will replace the TR "
	  "specified in the input image file, if any." },
	{ OPT_FLAGS_FULL, OPT_VAL_BOOL, &opt_keepdc, 1, "keepdc",
	  "Keep DC component (mean signal).  Has no effect for lowpass and "
	  "bandpass filter types (which already keep the DC component)." }
	  
    };

    memset(&bdr, '\0', sizeof(bdr));

    argc -= opt_parse(argc, argv, numopts, &opts[0], 0);

    if (opt_version) {
	fprintf(stdout, "%s\n", XMLH_VERSIONSTR);
	exit(0);
    }
    if (argc != 3) {
	fprintf(stderr, "Usage: %s [opts] input.bxh output.bxh\n", argv[0]);
	fprintf(stderr, "Use the --help option for more help.\n");
	goto FAIL;
    }
    if (opt_filtertype == NULL) {
	fprintf(stderr, "Missing --filtertype option!\n");
	fprintf(stderr, "Use the --help option for help.\n");
	goto FAIL;
    }
    if (strcmp(opt_filtertype, "lowpass") == 0 ||
	strcmp(opt_filtertype, "highpass") == 0) {
	if (opt_period[0] < 0) {
	    fprintf(stderr, "Missing --period option?\n");
	    fprintf(stderr, "Use the --help option for help.\n");
	    goto FAIL;
	} else if (opt_period[0] == 0) {
	    fprintf(stderr, "--period argument must be greater than 0!\n");
	    fprintf(stderr, "Use the --help option for help.\n");
	    goto FAIL;
	}
	if (opt_period[1] > 0) {
	    fprintf(stderr, "Too many --period options for filter '%s'.\n",
		    opt_filtertype);
	    fprintf(stderr, "Use the --help option for help.\n");
	    goto FAIL;
	}
    } else if (strcmp(opt_filtertype, "bandpass") == 0 ||
	       strcmp(opt_filtertype, "bandstop") == 0) {
	if (opt_period[0] < 0 || opt_period[1] < 0) {
	    fprintf(stderr, "Not enough --period options?\n");
	    fprintf(stderr, "Use the --help option for help.\n");
	    goto FAIL;
	} else if (opt_period[0] == 0 || opt_period[1] == 0) {
	    fprintf(stderr, "--period arguments must be greater than 0!\n");
	    fprintf(stderr, "Use the --help option for help.\n");
	    goto FAIL;
	}
    } else {
	fprintf(stderr, "Unrecognized filter type %s\n", opt_filtertype);
	fprintf(stderr, "Use the --help option for help.\n");
	goto FAIL;
    }

    if (opt_ripple < 0 || opt_ripple > 100) {
	fprintf(stderr, "--ripple argument must be between 0 and 100, inclusive.");
	goto FAIL;
    }

    if (opt_keepdc) {
	if (strcmp(opt_filtertype, "lowpass") == 0 ||
	    strcmp(opt_filtertype, "bandstop") == 0) {
	    opt_keepdc = 0;
	}
    }

    msbfirst = (((char *)&msbfirst)[0] == 0);

    outputfile = (char *)malloc(sizeof(char)*(strlen(argv[argc-1]) + 5));
    outputbxh = (char *)malloc(sizeof(char)*(strlen(argv[argc-1]) + 5));
    strcpy(outputbxh, argv[argc-1]);
    strcpy(outputfile, argv[argc-1]);
    extpos = strrchr(outputfile, '.');
    if (extpos == NULL) {
	strcat(outputbxh, ".bxh");
    } else if (strcmp(extpos, ".bxh") == 0) {
	/* user specified BXH file as output */
	strcpy(extpos, ".img");
    } else {
	/* user specified data file as output */
	strcpy(outputbxh + (extpos - outputfile), ".bxh");
    }
    if (!opt_overwrite) {    
	if (stat(outputfile, &statbuf) == 0) {
	    fprintf(stderr, "%s: output file '%s' exists.\n", argv[0], outputfile);
	    return -1;
	}
	if (stat(outputbxh, &statbuf) == 0) {
	    fprintf(stderr, "%s: output file '%s' exists.\n", argv[0], outputbxh);
	    return -1;
	}
    }

    {
	BXHElementPtr trelemp = NULL;
	int iteration = 0;

	if (bxh_dataReadFileStart(argv[1], "image", NULL, 0, NULL, NULL, &bdr) != 0) {
	    fprintf(stderr, "Error preparing data read for '%s'.\n", argv[1]);
	    goto FAIL;
	}
	if (bdr.datarec->numdims != 4) {
	    fprintf(stderr, "Input data must have 4 dimensions!\n");
	    goto FAIL;
	}
	if (opt_order > bdr.datarec->dimensions[3].size) {
	    fprintf(stderr, "Error: order of filter (%u) is greater than number of time points (%u)!\n", (unsigned int)opt_order, (unsigned int)bdr.datarec->dimensions[3].size);
	    goto FAIL;
	}

	if (opt_forcetr == 0) {
	    if ((trelemp = bxh_getChildElement(bdr.acqdatap, "tr")) == NULL ||
		bxh_getElementDoubleValue(trelemp, &TR) != 0) {
		TR = bdr.datarec->dimensions[3].spacing;
/* 		fprintf(stderr, "Unable to read TR from image header!\n"); */
/* 		goto FAIL; */
	    }
	    if (trelemp) {
		bxh_element_unref(trelemp); trelemp = NULL;
	    }
	    TR /= 1000.0; /* convert to seconds */
	} else {
	    TR = opt_forcetr;
	}

	if (opt_period[0] <= 2 * TR) {
	    fprintf(stderr, "Specified period %g must be greater than 2*TR (i.e. representing 0.5 times the sample rate).  The TR for this data is %g seconds per image acquisition.\n", opt_period[0], TR);
	    goto FAIL;
	}
	paramfreq[0] = TR / opt_period[0];
	if (opt_period[1] != -1) {
	    if (opt_period[1] <= 2 * TR) {
	    fprintf(stderr, "Specified period %g must be greater than 2*TR (i.e. representing 0.5 times the sample rate).  The TR for this data is %g seconds per image acquisition.\n", opt_period[1], TR);
		goto FAIL;
	    }
	    paramfreq[1] = TR / opt_period[1];

	    if (paramfreq[0] > paramfreq[1]) {
		double swap = paramfreq[0];
		paramfreq[0] = paramfreq[1];
		paramfreq[1] = swap;
	    }
	}

	if (bxh_dataReadFinish(&bdr, "float") != 0) {
	    fprintf(stderr, "Error finishing data read for '%s'.\n", argv[1]);
	    goto FAIL;
	}

	/* lowpass and highpass require one iteration;
	 * bandpass and bandstop require two, as well as a combining stage
	 */
	iteration = 0;
	if (strcmp(opt_filtertype, "lowpass") == 0 ||
	    strcmp(opt_filtertype, "highpass") == 0) {
	    iteration = 1;
	}
	for (/* null */; iteration < 2; iteration++) {
	    /* coefficient calculations derived from code at
	     * http://www.dspguide.com/ */
	    int order = (int)opt_order;
	    off_t extrapts = 3*order;
	    double *a;
	    double *b;
	    double *ta;
	    double *tb;
	    double elema[3]; /* element 2nd-order filter coefficients */
	    double elemb[3]; /* element 2nd-order filter coefficients */
	    int filtnum = 0; /* elementary 2nd-order filter number */
	    int cind = 0;
	    double sa = 0;
	    double sb = 0;
	    double invgain = 0;
	    double *vector = NULL;
	    double *rvector = NULL;
	    double *delays = NULL;
	    double *ssdelays = NULL; /* steady-state */
	    double freq;
	    int dohigh = 0;

	    off_t volsize = bdr.pagesizes[2];
	    off_t tsize = bdr.dimsizes[3];
	    off_t indxyz = 0;

	    a = (double *)malloc(sizeof(double)*(order+3));
	    b = (double *)malloc(sizeof(double)*(order+3));
	    ta = (double *)malloc(sizeof(double)*(order+3));
	    tb = (double *)malloc(sizeof(double)*(order+3));

	    memset(a, '\0', sizeof(a[0])*(order+3));
	    memset(b, '\0', sizeof(b[0])*(order+3));
	    a[2] = 1;
	    b[2] = 1;

	    if (strcmp(opt_filtertype, "lowpass") == 0 ||
		strcmp(opt_filtertype, "highpass") == 0) {
		freq = paramfreq[0];
	    } else {
		freq = paramfreq[iteration];
	    }

	    if (strcmp(opt_filtertype, "lowpass") == 0) {
		freq = paramfreq[0];
		dohigh = 0;
	    } else if (strcmp(opt_filtertype, "highpass") == 0) {
		freq = paramfreq[0];
		dohigh = 1;
	    } else if (strcmp(opt_filtertype, "bandstop") == 0) {
		freq = paramfreq[iteration];
		if (iteration == 0)
		    dohigh = 0;
		else
		    dohigh = 1;
	    } else if (strcmp(opt_filtertype, "bandpass") == 0) {
		freq = paramfreq[iteration];
		if (iteration == 0)
		    dohigh = 1;
		else
		    dohigh = 0;
	    }

	    dataptr = (float *)bdr.dataptr;
	    if (results == NULL) {
		results = (float *)malloc(sizeof(float)*volsize*tsize);
		memset(results, '\0', sizeof(float)*volsize*tsize);
	    }
	    if (iteration == 1) {
		if (strcmp(opt_filtertype, "bandpass") == 0) {
		    /* need to apply lowpass to highpass results */
		    dataptr = results;
		} else if (strcmp(opt_filtertype, "bandstop") == 0) {
		    /* need a copy of lowpass results to add to highpass results */
		    results2 = results;
		    results = (float *)malloc(sizeof(float)*volsize*tsize);
		    memset(results, '\0', sizeof(float)*volsize*tsize);
		}
	    }

	    for (filtnum = 1; filtnum <= (order / 2); filtnum++) {
		double polereal = -1 * cos((M_PI/(order*2)) + ((filtnum-1)*M_PI/order));
		double poleimag = sin((M_PI/(order*2)) + ((filtnum-1)*M_PI/order));
		if (opt_ripple != 0) {
		    double tmpes = 100.0/(100.0-opt_ripple);
		    double es = sqrt((tmpes*tmpes) - 1);
		    double inves = 1.0/es;
		    double vx = (1.0/order) * log(inves + sqrt((inves*inves) + 1));
		    double kx = (1.0/order) * log(inves + sqrt((inves*inves) - 1));
		    kx = (exp(kx) + exp(-1.0*kx)) / 2.0;
		    polereal *= ((exp(vx) - exp(-1.0*vx)) / 2.0) / kx;
		    poleimag *= ((exp(vx) + exp(-1.0*vx)) / 2.0) / kx;
		}

		{		
		    /* s-domain to z-domain */
		    double t = 2 * tan(0.5);
		    double w = 2 * M_PI * freq;
		    double m = (polereal * polereal) + (poleimag * poleimag);
		    double d = 4 - (4 * polereal * t) + (m * t * t);
		    double x0 = t * t / d;
		    double x1 = 2 * t * t / d;
		    double x2 = t * t / d;
		    double y1 = (8 - (2 * m * t * t)) / d;
		    double y2 = (-4 - (4 * polereal * t) - (m * t * t)) / d;
		
		    double k = 0;
		    if (dohigh) {
			k = -1 * cos((w/2.0) + 0.5) / cos((w/2.0) - 0.5);
		    } else {
			k = sin(0.5 - (w/2.0)) / sin(0.5 + (w/2.0));
		    }
		    d = 1 + (y1*k) - (y2*k*k);
		    elema[0] = (x0 - (x1*k) + (x2*k*k)) / d;
		    elema[1] = (-2 * x0*k + x1 + (x1*k*k) - (2*x2*k)) / d;
		    elema[2] = (x0*k*k - x1*k + x2) / d;
		    elemb[0] = 1;
		    elemb[1] = (2*k + y1 + (y1*k*k) - (2*y2*k)) / d;
		    elemb[2] = ((-1*k*k) - (y1*k) + y2) / d;
		    if (dohigh) {
			elema[1] *= -1;
			elemb[1] *= -1;
		    }
		}

		for (cind = 0; cind <= order+2; cind++) {
		    ta[cind] = a[cind];
		    tb[cind] = b[cind];
		}
		for (cind = 2; cind <= order+2; cind++) {
		    a[cind] = elema[0]*ta[cind] + elema[1]*ta[cind-1] + elema[2]*ta[cind-2];
		    b[cind] =          tb[cind] - elemb[1]*tb[cind-1] - elemb[2]*tb[cind-2];
		}
	    }
	    
	    b[2] = 0;
	    for (cind = 0; cind <= order; cind++) {
		a[cind] = a[cind+2];
		b[cind] = -1 * b[cind+2];
	    }

	    sa = 0;
	    sb = 0;
	    if (dohigh) {
		for (cind = 0; cind <= order; cind++) {
		    if (cind % 2 == 0) {
			sa = sa + a[cind];
			sb = sb + b[cind];
		    } else {
			sa = sa - a[cind];
			sb = sb - b[cind];
		    }
		}
	    } else {
		for (cind = 0; cind <= order; cind++) {
		    sa = sa + a[cind];
		    sb = sb + b[cind];
		}
	    }
	    if (sa == 0)
		invgain = 1;
	    else
		invgain = (1.0 - sb) / sa;

	    for (cind = 0; cind <= order; cind++) {
		a[cind] *= invgain;
	    }

	    b[0] = 1;

#if 0
	    fprintf(stderr, "a: ");
	    for (cind = 0; cind <= order; cind++) {
		fprintf(stderr, " %g", a[cind]);
	    }
	    fprintf(stderr, "\n");
	    fprintf(stderr, "b: ");
	    for (cind = 0; cind <= order; cind++) {
		fprintf(stderr, " %g", b[cind]);
	    }
	    fprintf(stderr, "\n");
#endif

	    /* figure out steady-state values. */
	    ssdelays = (double *)malloc(sizeof(double)*order);
	    delays = (double *)malloc(sizeof(double)*order);
	    memset(ssdelays, '\0', sizeof(double)*order);
	    {
		/* Dz = e */
	        gsl_matrix * D = NULL;
		gsl_vector * e = NULL;
		gsl_vector * z = NULL;
		gsl_permutation * p = NULL;
		int s;
		off_t tmpind = 0;

		D = gsl_matrix_calloc(order, order);
		e = gsl_vector_calloc(order);
		z = gsl_vector_calloc(order);
		p = gsl_permutation_alloc (order);

		gsl_matrix_set_all(D, 0);
		gsl_matrix_set(D, 0, 0, 1.0 - b[1]);
		for (tmpind = 0; tmpind < order; tmpind++) {
		    gsl_vector_set(z, tmpind, 0);
		}
		for (tmpind = 1; tmpind < order; tmpind++) {
		    gsl_matrix_set(D, tmpind, 0, -1.0 * b[tmpind+1]);
		    gsl_matrix_set(D, tmpind, tmpind, 1);
		    gsl_matrix_set(D, tmpind-1, tmpind, -1);
#if 0
		    fprintf(stderr, "D[%u,%u] = %g\n", (unsigned int)tmpind, (unsigned int)0, (double)gsl_matrix_get(D, tmpind, 0));
		    fprintf(stderr, "D[%u,%u] = %g\n", (unsigned int)tmpind, (unsigned int)tmpind, (double)gsl_matrix_get(D, tmpind, tmpind));
		    fprintf(stderr, "D[%u,%u] = %g\n", (unsigned int)tmpind-1, (unsigned int)tmpind, (double)gsl_matrix_get(D, tmpind-1, tmpind));
#endif
		}
		for (tmpind = 0; tmpind < order; tmpind++) {
		    gsl_vector_set(e, tmpind, a[tmpind+1] + b[tmpind+1]*a[0]);
#if 0
		    fprintf(stderr, "e[%u] = %g\n", (unsigned int)tmpind, (double)gsl_vector_get(e, tmpind));
#endif
		}

#if 0
		fprintf(stderr, "D:\n");
		for (cind = 0; cind < order; cind++) {
		    off_t cind2;
		    for (cind2 = 0; cind2 < order; cind2++) {
			fprintf(stderr, " %10g", gsl_matrix_get(D, cind, cind2));
		    }
		    fprintf(stderr, "\n");
		}
		fprintf(stderr, "\n");
		fprintf(stderr, "e: ");
		for (cind = 0; cind < order; cind++) {
		    fprintf(stderr, " %g", gsl_vector_get(e, cind));
		}
		fprintf(stderr, "\n");
#endif

		gsl_linalg_LU_decomp (D, p, &s);
		gsl_linalg_LU_solve (D, p, e, z);

		for (tmpind = 0; tmpind < order; tmpind++) {
		    ssdelays[tmpind] = gsl_vector_get(z, tmpind);
		}
#if 0
		fprintf(stderr, "ssdelays: ");
		for (cind = 0; cind < order; cind++) {
		    fprintf(stderr, " %g", ssdelays[cind]);
		}
		fprintf(stderr, "\n");
#endif

		gsl_permutation_free(p);
		gsl_vector_free(e);
		gsl_matrix_free(D);
		gsl_vector_free(z);
	    }

	    vector = (double *)malloc(sizeof(double) * (tsize + 2*extrapts));
	    rvector = (double *)malloc(sizeof(double) * (tsize + 2*extrapts));

	    for (indxyz = 0; indxyz < volsize; indxyz++) {
		int indt = 0;
		int sample0ind = extrapts; /* index of first sample */
		int samplenind = extrapts + tsize - 1; /* index of last sample */
		int curind = 0;
		int refldir = 0;
		off_t reflpos = 0;
		double scalefactor = 0;
		double mean = 0;

		/* store the time course in a vector for fast access;
		 *  first sample occurs at index 4*order
		 *  last sample occurs at index 4*order + tsize - 1
		 */
		for (indt = 0; indt < tsize; indt++) {
		    vector[sample0ind + indt] = (double)dataptr[indt*volsize + indxyz];
		}

		/* calculate the mean for use later */
		mean = 0;
		for (indt = 0; indt < tsize; indt++) {
		    mean += (vector[sample0ind + indt] - mean) / (indt+1);
		}

		/* "reflect/invert" the signal to provide earlier values,
		 * up to 3*order points are extrapolated using point-symmetric
		 * padding.  If number of samples is less than 3*order,
		 * then use as much as is available, and then reflect/invert
		 * again to get more points.
		 * An additional order points are initialized from
		 * steady-state calculation.
		 */
		refldir = 1;
		reflpos = sample0ind + 1;
		for (curind = sample0ind - 1; curind >= 0; curind--) {
		    if (refldir == 1) {
			vector[curind] = (2 * vector[sample0ind]) - vector[reflpos];
			if (reflpos == samplenind)
			    refldir *= -1;
		    } else {
			vector[curind] = vector[reflpos];
			if (reflpos == sample0ind)
			    refldir *= -1;
		    }
		    reflpos += refldir;
		}
		/* "reflect/invert" the new signal to provide later values,
		 * using same procedure as above. */
		refldir = -1;
		reflpos = samplenind - 1;
		for (curind = samplenind + 1; curind <= samplenind + extrapts; curind++) {
		    if (refldir == 1) {
			vector[curind] = vector[reflpos];
			if (reflpos == samplenind)
			    refldir *= -1;
		    } else {
			vector[curind] = (2 * vector[samplenind]) - vector[reflpos];
			if (reflpos == sample0ind)
			    refldir *= -1;
		    }
		    reflpos += refldir;
		}
		for (curind = 0; curind <= samplenind + extrapts; curind++) {
		    rvector[curind] = vector[curind];
		}

#if 0
		if (indxyz == 31*64 + 31) {
		    fprintf(stderr, "vector:    ");
		    for (indt = 0; indt <= extrapts && indt < tsize; indt++) {
			fprintf(stderr, " %7.2f", vector[sample0ind + indt]);
		    }
		    fprintf(stderr, "\n");
		    fprintf(stderr, "vectorinv: ");
		    for (curind = sample0ind; curind >= 0; curind--) {
			fprintf(stderr, " %7.2f", vector[curind]);
		    }
		    fprintf(stderr, "\n");
		}
#endif

		/* scale steady-state delays */
		scalefactor = vector[0];
		for (curind = 0; curind < order; curind++) {
		    delays[curind] = scalefactor * ssdelays[curind];
		}
		/* filter once using transposed direct form II method */
		for (curind = 0; curind <= samplenind + extrapts; curind++) {
		    off_t ordernum = 0;
		    double curinput = vector[curind];
		    double curoutput = delays[0] + (curinput * a[0]);
		    rvector[curind] = curoutput;
		    /* calculate values of delays for next timestep */
		    for (ordernum = 1; ordernum < order; ordernum++) {
			delays[ordernum-1] =
			    delays[ordernum] +
			    (curinput * a[ordernum]) +
			    (curoutput * b[ordernum]);
		    }
		    if (order > 1) {
			/* last component does not have an input delay */
			delays[order-1] =
			    (curinput * a[order]) +
			    (curoutput * b[order]);
		    }
		}

		/* write the filtered time course back into vector */
		for (curind = sample0ind; curind <= samplenind + extrapts; curind++) {
		    vector[curind] = rvector[curind];
		}

#if 0
		if (indxyz == 31*64 + 31) {
		    fprintf(stderr, "rvector:    ");
		    for (indt = 0; indt <= extrapts && indt < tsize; indt++) {
			fprintf(stderr, " %7.2f", rvector[samplenind - indt]);
		    }
		    fprintf(stderr, "\n");
		    fprintf(stderr, "rvectorinv: ");
		    for (curind = samplenind; curind <= samplenind + extrapts; curind++) {
			fprintf(stderr, " %7.2f", rvector[curind]);
		    }
		    fprintf(stderr, "\n");
		}
#endif

		/* scale steady-state delays */
		scalefactor = vector[samplenind + extrapts];
		for (curind = 0; curind < order; curind++) {
		    delays[curind] = scalefactor * ssdelays[curind];
		}
		/* now filter again, backwards
		 * (probably don't need to go down to 0) */
		for (curind = samplenind + extrapts; curind >= 0; curind--) {
		    off_t ordernum = 0;
		    double curinput = vector[curind];
		    double curoutput = delays[0] + (curinput * a[0]);
		    rvector[curind] = curoutput;
		    /* calculate values of delays for next timestep */
		    for (ordernum = 1; ordernum < order; ordernum++) {
			delays[ordernum-1] =
			    delays[ordernum] +
			    (curinput * a[ordernum]) +
			    (curoutput * b[ordernum]);
		    }
		    if (order > 1) {
			/* last component does not have an input delay */
			delays[order-1] =
			    (curinput * a[order]) +
			    (curoutput * b[order]);
		    }
		}

		if (opt_keepdc) {
		    double newmean = 0;
		    /* add back original mean */
		    for (indt = 0; indt < tsize; indt++) {
			newmean += (rvector[sample0ind + indt] - newmean) / (indt + 1);
		    }
		    for (indt = 0; indt < tsize; indt++) {
			rvector[sample0ind + indt] += (mean - newmean);
		    }
		}

		/* put results vector data in the right place */
		for (indt = 0; indt < tsize; indt++) {
		    results[indt*volsize + indxyz] = (float)rvector[sample0ind + indt];
		}
	    }

	    if (strcmp(opt_filtertype, "bandstop") == 0 && results2 != NULL) {
		size_t numelems = volsize*tsize;
		size_t indxyzt = 0;
		for (indxyzt = 0; indxyzt < numelems; indxyzt++) {
		    results[indxyzt] += results2[indxyzt];
		}
	    }

	    free(vector);
	    free(rvector);
	    free(delays);
	    free(ssdelays);
	    free(a);
	    free(b);
	    free(ta);
	    free(tb);
	}
    }

    /* write out results */
    if ((fp = fopen(outputfile, "wb")) == NULL) {
	fprintf(stderr, "Error opening file %s\n", outputfile);
	return -1;
    }
    if (fwrite(results, sizeof(float)*bdr.pagesizes[bdr.datarec->numdims-1], 1, fp) != 1) {
	fprintf(stderr, "Error writing to file %s\n", outputfile);
	return -1;
    }

    /* create BXH file for output */
    bxh_datarec_frags_free(bdr.datarec);
    free(bdr.datarec->elemtype);
    bdr.datarec->elemtype = strdup("float32");
    bxh_datarec_addfrag(bdr.datarec, outputfile, 0, sizeof(float)*bdr.pagesizes[bdr.datarec->numdims-1], outputbxh, 1);
    if (bxh_datarec_writeToElement(bdr.imagedatap, bdr.datarec) != 0) {
	fprintf(stderr, "Failed writing datarec\n");
	return -1;
    }
    if (bxh_addAutoHistoryEntry(bdr.docp, argv[0], (const char **)&argv[1], oldargc-1) != 0) {
	fprintf(stderr, "Error adding history entry\n");
	return -1;
    }
    if (bxh_writeFile(bdr.docp, outputbxh) != 0) {
	fprintf(stderr, "Error writing output file %s\n", outputbxh);
	return -1;
    }
    goto EXIT;
    
  FAIL:
    retval = -1;

  EXIT:
    bxh_datareaddata_free(&bdr);
    if (results) free(results);
    if (results2) free(results2);
    free(outputbxh); outputbxh = NULL;
    free(outputfile); outputfile = NULL;
    return retval;
}

#endif /* #ifndef HAVE_LIBGSL #else */

/*
 * $Log: In-line log eliminated on transition to SVN; use svn log instead. $
 * Revision 1.33  2008/01/28 17:01:24  gadde
 * off_t is unsigned in LSB???  Oh well...
 *
 * Revision 1.32  2007/03/20 17:43:06  gadde
 * Allow TR from dimt->spacing if not anywhere else
 *
 * Revision 1.31  2006/07/13 16:10:57  gadde
 * Win32 fixes
 *
 * Revision 1.30  2006/06/05 20:22:35  gadde
 * Fix delay calculation.
 *
 * Revision 1.29  2006/05/04 16:54:31  gadde
 * Use float instead of double for storing data.
 *
 * Revision 1.28  2005/09/20 18:37:55  gadde
 * Updates to versioning, help and documentation, and dependency checking
 *
 * Revision 1.27  2005/09/19 16:31:57  gadde
 * Documentation and help message updates.
 *
 * Revision 1.26  2005/09/14 14:49:30  gadde
 * Type conversion updates to fix win32 warnings
 *
 * Revision 1.25  2005/09/09 20:11:01  gadde
 * Add --keepdc option to bxh_epochavg, and call it from bxh_eventstats
 *
 * Revision 1.24  2005/06/15 17:23:16  gadde
 * Don't declare variables in middle of block.
 *
 * Revision 1.23  2005/06/15 17:19:43  gadde
 * Add definition for M_PI if it doesn't exist.
 *
 * Revision 1.22  2005/05/30 19:07:41  gadde
 * Add --overwrite option.
 *
 * Revision 1.21  2005/05/30 18:40:09  gadde
 * Lots of updates:
 *  Now requires GSL
 *  Better computation of steady-state delays
 *  Fixed scaling of steady-state delays
 *  Add support for bandstop/bandpass (as combinations of lowpass/highpass)
 *  #ifdef out several debugging messages
 *
 * Revision 1.20  2005/05/26 21:56:46  gadde
 * Use "transposed direct form II method".
 *
 * Revision 1.19  2005/05/26 19:42:03  gadde
 * First try at implementing transposed direct form II method.
 *
 * Revision 1.18  2005/05/26 19:03:38  gadde
 * Specify frequencies as period.
 * Create extra points on either end of vector using point-symmetric padding.
 * Try to find steady-state parameters (hack!)
 *
 * Revision 1.17  2005/05/25 19:31:24  gadde
 * Fix indexing.
 *
 * Revision 1.16  2005/05/25 19:28:39  gadde
 * Some more argument checking.
 * Copy voxel time courses into vector for fast access.
 * Use "reflection method" to generate extra points on either end of vector.
 *
 * Revision 1.15  2005/05/25 18:02:11  gadde
 * Make --frequency actually mean what it says.
 * Also do some more argument checking.
 *
 * Revision 1.14  2005/05/25 17:21:22  gadde
 * Fix out-of-date help message.
 *
 * Revision 1.13  2005/05/25 17:17:43  gadde
 * Remove diagnostic messages.
 * Don't support bandpass and bandstop yet.
 * Start out[0] with initial value of in[0], and use that value as padding
 * for both input and output arrays.
 *
 * Revision 1.12  2005/05/24 20:38:18  gadde
 * Clarify parts of algorithm.
 *
 * Revision 1.11  2005/05/24 20:01:29  gadde
 * Be more careful with data types.
 *
 * Revision 1.10  2005/05/24 19:59:55  gadde
 * More diagnostics.
 *
 * Revision 1.9  2005/05/24 19:58:10  gadde
 * More diagnostics.
 *
 * Revision 1.8  2005/05/24 19:47:55  gadde
 * Update diagnostic.
 *
 * Revision 1.7  2005/05/24 19:45:49  gadde
 * Diagnostic output.
 *
 * Revision 1.6  2005/05/24 16:39:24  gadde
 * Fix gain calculation at endpoints.
 *
 * Revision 1.5  2005/05/23 20:18:22  gadde
 * Some more algorithmic bug fixes.
 *
 * Revision 1.4  2005/05/23 18:58:28  gadde
 * Fix some bugs in the algorithm.
 *
 * Revision 1.3  2005/05/23 18:21:25  gadde
 * Fix some filter-order indexing problems.
 *
 * Revision 1.2  2005/05/23 17:58:50  gadde
 * Fix memory allocation bug.
 *
 * Revision 1.1  2005/05/23 16:08:10  gadde
 * Initial import.
 *
 */
