static const char rcsid[] = "$Id: bxh_brainmask.c,v 1.18 2009-01-15 20:55:19 gadde Exp $";

/*
 * bxh_brainmask.c --
 * 
 *  For a given 3-D or 4-D data set, make a 3-D brain mask.
 *  
 */

#include <bxh_config.h>

#include <stdio.h>

#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifndef WIN32
#include <unistd.h>
#endif
#include <math.h>

#ifdef HAVE_LIBGSL
#include <gsl/gsl_multifit.h>
#endif

#include "bxh_niftilib.h"
#include "bxh_datarec.h"
#include "opts.h"

#ifndef XMLH_VERSIONSTR
#define XMLH_VERSIONSTR "(no version specified)"
#endif

float *
mergesort_float(float * data, size_t numitems)
{
    float * tempdata = NULL;
    float * sorteddata = NULL;
    size_t setsize = 1;
    tempdata = (float *)malloc(sizeof(float)*numitems);
    sorteddata = (float *)malloc(sizeof(float)*numitems);
    memcpy(sorteddata, data, sizeof(float)*numitems);
    for (setsize = 1; setsize < numitems; setsize *= 2) {
	size_t setstart = 0;
	size_t tempind = 0;
	for (setstart = 0;
	     setstart < numitems;
	     setstart += (2 * setsize)) {
	    size_t start1 = setstart;
	    size_t start2 = start1 + setsize;
	    size_t end1 = start2;
	    size_t end2 = start2 + setsize;
	    if (end2 > numitems) { end2 = numitems; }
	    if (start2 >= numitems) { start2 = numitems; }
	    if (end1 > numitems) { end1 = numitems; }
	    while (start1 < end1 && start2 < end2) {
		if (sorteddata[start1] < sorteddata[start2]) {
		    tempdata[tempind++] = sorteddata[start1++];
		} else {
		    tempdata[tempind++] = sorteddata[start2++];
		}
	    }
	    while (start1 < end1) {
		tempdata[tempind++] = sorteddata[start1++];
	    }
	    while (start2 < end2) {
		tempdata[tempind++] = sorteddata[start2++];
	    }
	}
	{
	    float * swap = tempdata;
	    tempdata = sorteddata;
	    sorteddata = swap;
	}
    }
    free(tempdata);
    return sorteddata;
}

int
main(int argc, char *argv[])
{
    int retval = 0;
    struct stat statbuf;
    const char * inputfile = NULL;
    char * outputbase = NULL;
    char * outputbxh = NULL;
    char * outputfile = NULL;
    char * outputfilegz = NULL;
    const char * opt_select[] = { ":", ":", ":", ":" };
    char * opt_method = NULL;
    char * opt_filterthresh = NULL;
    char * opt_filterrank = NULL;
    int opt_debug = 0;
    unsigned int opt_filterorder = 5;
    int opt_overwrite = 0;
    int opt_version = 0;

    const char * ordereddimnames[] = { "x", "y", "z", "t" };
    struct bxhdataread bdr;
    size_t * dimsizes = NULL;
    size_t * pagesizes = NULL;
    float * dataptr = NULL;
    BXHDocPtr docp = NULL;
    bxhrawdatarec * outdatarec = NULL;
    bxhrawdatarec * tmpdatarec = NULL;

    const int numopts = 13;
    opt_data opts[13] = {
	{ 0x0, OPT_VAL_NONE, NULL, 0, "",
	  "Usage:\n"
	  "  bxh_brainmask [opts] inputfile outputfile\n\n"
	  "This program will attempt to create a simple (thresholded) "
	  "brain mask given a BXH- or XCEDE-wrapped input image.  "
	  "Output is also a BXH- or XCEDE-wrapped input image.  "
	  "Calculation of the threshold is modified using various options." },
	{ 0x0, OPT_VAL_NONE, NULL, 0, "", "" },
	{ OPT_FLAGS_FULL, OPT_VAL_BOOL, &opt_version, 1, "version",
	  "Print version string and exit." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_select[3], 1, "timeselect",
	  "Comma-separated list of timepoints to use (first timepoint is 0).  "
	  "Any timepoint can be a contiguous range, specified as two "
	  "numbers separated by a colon, i.e. 'START:END'.  "
	  "An empty END implies the last timepoint.  "
	  "The default step of 1 (one) in ranges can be changed using "
	  "'START:STEP:END', which is equivalent to "
	  "'START,START+STEP,START+(2*STEP),...,END'.  "
	  "Default is all timepoints (:)." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_select[0], 1, "xselect",
	  "Just like timeselect, but for the 'x' dimension." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_select[1], 1, "yselect",
	  "Just like timeselect, but for the 'y' dimension." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_select[2], 1, "zselect",
	  "Just like timeselect, but for the 'z' dimension." },
	{ OPT_FLAGS_FULL, OPT_VAL_BOOL, &opt_overwrite, 1, "overwrite",
	  "Overwrite existing output files (otherwise error and exit). " },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_method, 1, "method",
	  "Method to use for creating the brain mask.\n"
	  "'threshold' marks those voxels whose mean value over time are not "
	  "less than a given threshold (provided by --filterthresh).\n"
	  "'rank' chooses the largest threshold that allows at "
	  "least the n highest-valued voxels (as determined by the voxel's "
	  "mean value over time) where n is specified by --filterrank.\n"
#ifdef HAVE_LIBGSL
	  "'localmin' fits a nth-order polynomial (order optionally specified "
	  "by --filterorder) to an intensity histogram of the minimum value "
	  "of each voxel over time, and chooses the first local minimum "
	  "(disregarding the first histogram bucket) as the "
	  "threshold.  This method assumes the data follows an intensity "
	  "distribution with at least two \"humps\", the first (lower) of "
	  "which reflects noise.\n"
#endif
	  "Default is 'rank'." },
	{ OPT_FLAGS_FULL, OPT_VAL_UINT, &opt_filterorder, 1, "filterorder",
	  "Order of the polynomial used for --method localmin.  "
	  "Default is 5." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_filterthresh, 1, "filterthresh",
	  "Threshold used for --method threshold.  If value ends with "
	  "the percent sign (%), then this is taken as a percent of "
	  "maximum intensity.  Default is '50%'." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_filterrank, 1, "filterrank",
	  "Threshold used for --method rank.  If value ends with "
	  "the percent sign (%), then this is taken as a percent of "
	  "the number of total voxels.  Default is '20%'." },
	{ OPT_FLAGS_FULL, OPT_VAL_BOOL, &opt_debug, 1, "debug",
	  "Print out debugging messages." }
    };

    memset(&bdr, '\0', sizeof(bdr));

    argc -= opt_parse(argc, argv, numopts, &opts[0], 0);

    if (opt_method == NULL) {
	opt_method = strdup("rank");
    }
    if (opt_filterthresh == NULL) {
	opt_filterthresh = strdup("50%");
    }
    if (opt_filterrank == NULL) {
	opt_filterrank = strdup("20%");
    }

    if (opt_version) {
	fprintf(stdout, "%s\n", XMLH_VERSIONSTR);
	exit(0);
    }
    if (argc != 3) {
	fprintf(stderr, "Usage: %s xmlfile outputfile\n", argv[0]);
	fprintf(stderr, "Use the --help option for more help.\n");
	goto FAIL;
    }

    inputfile = argv[1];

    outputbase = (char *)malloc(sizeof(char)*(strlen(argv[2]) + 1));
    outputbxh = (char *)malloc(sizeof(char)*(strlen(argv[2]) + 5));
    outputfile = (char *)malloc(sizeof(char)*(strlen(argv[2]) + 5));
    outputfilegz = (char *)malloc(sizeof(char)*(strlen(argv[2]) + 8));
    strcpy(outputbxh, argv[2]);
    {
	char * extpos = NULL;
	extpos = strrchr(outputbxh, '.');
	if (extpos == NULL) {
	    /* no extension on output */
	    strcpy(outputbase, outputbxh);
	    strcpy(outputfile, outputbxh);
	    strcpy(outputfilegz, outputbxh);
	    strcat(outputfile, ".nii");
	    strcat(outputfilegz, ".nii.gz");
	} else {
	    size_t baselen = (extpos - outputbxh);
	    strncpy(outputbase, outputbxh, baselen);
	    strncpy(outputfile, outputbxh, baselen);
	    strncpy(outputfilegz, outputbxh, baselen);
	    outputbase[baselen] = '\0';
	    strcpy(outputfile + baselen, ".nii");
	    strcpy(outputfilegz + baselen, ".nii.gz");
	}
    }
    if (!opt_overwrite) {
	if (stat(outputfilegz, &statbuf) == 0) {
	    fprintf(stderr, "%s: output file '%s' exists.\n", argv[0], outputfilegz);
	    goto FAIL;
	}
	if (stat(outputbxh, &statbuf) == 0) {
	    fprintf(stderr, "%s: output file '%s' exists.\n", argv[0], outputbxh);
	    goto FAIL;
	}
    }

    if (bxh_dataReadFileStart(inputfile, "image", NULL, 4, ordereddimnames, opt_select, &bdr) != 0) {
	fprintf(stderr, "Error preparing data read for '%s'.\n", inputfile);
	goto FAIL;
    }
    if (bdr.datarec->numdims < 3) {
	fprintf(stderr, "Data must be at least 3-dimensional.\n");
	goto FAIL;
    }

    if (bxh_dataReadFinish(&bdr, "float") != 0) {
	fprintf(stderr, "Error finishing data read for '%s'.\n", inputfile);
	goto FAIL;
    }

    dataptr = (float *)bdr.dataptr;
    dimsizes = bdr.dimsizes;
    pagesizes = bdr.pagesizes;
    docp = bdr.docp;
    if (bdr.datarec->numdims == 3) {
	dimsizes = bdr.dimsizes = realloc(dimsizes, sizeof(size_t)*4);
	pagesizes = bdr.pagesizes = realloc(pagesizes, sizeof(size_t)*4);
	dimsizes[3] = 1;
	pagesizes[3] = pagesizes[2];
    }

    if (dimsizes[3] == 0) {
	fprintf(stderr, "Number of time points must be greater than 0!\n");
	goto FAIL;
    }

    /*** Do the dirty work here ***/
    {
	char * results = (char *)malloc(sizeof(char)*pagesizes[2]);
	if (strcmp(opt_method, "rank") == 0) {
	    float filterrank = 0;
	    size_t indxyzt = 0;
	    size_t indxyz = 0;
	    size_t indt = 0;
	    char * endptr = NULL;
	    float threshold = 0;
	    float * means = NULL;
	    size_t volsize = pagesizes[2];
	    filterrank = strtod(opt_filterrank, &endptr);
	    if (endptr == opt_filterrank ||
		(*endptr != '\0' && *endptr != '%')) {
		fprintf(stderr, "Bad rank value '%s'!\n", opt_filterrank);
		goto FAIL;
	    }
	    if (*endptr == '%') {
		filterrank = pagesizes[3] * (filterrank / 100.0);
	    }
	    /* calculate means */
	    means = (float *)malloc(sizeof(float)*volsize);
	    memset(means, '\0', sizeof(float)*volsize);
	    indxyzt = 0;
	    for (indt = 0; indt < dimsizes[3]; indt++) {
		for (indxyz = 0; indxyz < volsize; indxyz++) {
		    means[indxyz] +=
			(dataptr[indxyzt] - means[indxyz]) / (indt + 1);
		    indxyzt++;
		}
	    }
	    /* sort means (using merge sort) */
	    {
		float * sortedmeans = NULL;
		sortedmeans = mergesort_float(means, volsize);
		threshold = sortedmeans[volsize / 2];
		free(sortedmeans);
	    }
	    for (indxyz = 0; indxyz < volsize; indxyz++) {
		results[indxyz] = (means[indxyz] >= threshold);
	    }
	    free(means); means = NULL;
	} else if (strcmp(opt_method, "threshold") == 0) {
	    size_t indxyz = 0;
	    size_t indt = 0;
	    float mean = 0;
	    char * endptr = NULL;
	    float threshold = 0;
	    size_t volsize = pagesizes[2];
	    threshold = strtod(opt_filterthresh, &endptr);
	    if (endptr == opt_filterthresh ||
		(*endptr != '\0' && *endptr != '%')) {
		fprintf(stderr, "Bad threshold value '%s'!\n", opt_filterthresh);
		goto FAIL;
	    }
	    if (*endptr == '%') {
		size_t indxyzt;
		float max = dataptr[0];
		float * curptr = dataptr;
		size_t datasize = pagesizes[3];
		for (indxyzt = 0; indxyzt < datasize; indxyzt++) {
		    if (*curptr > max) max = *curptr;
		    curptr++;
		}
		threshold = max * (threshold / 100.0);
	    }
	    for (indxyz = 0; indxyz < volsize; indxyz++) {
		mean = 0;
		for (indt = 0; indt < dimsizes[3]; indt++) {
		    mean += (dataptr[indxyz + indt*volsize] - mean) / (indt + 1);
		}
		results[indxyz] = (mean >= threshold);
	    }
#ifdef HAVE_LIBGSL
	} else if (strcmp(opt_method, "localmin") == 0) {
	    /* algorithm based on "extract" function by Martin McKeown,
	     * via Josh Bizzell's tstatprofile2, but does not apply
	     * a threshold (50 in tstatprofile2) to the bins.
	     */
	    size_t volsize = pagesizes[2];
	    size_t indxyzt = 0;
	    size_t indxyz = 0;
	    size_t indt = 0;
	    size_t indbin = 0;
	    float * mins = NULL;
	    float * sortedmins = NULL;
	    float * hist = NULL;
	    float * histfit = NULL;
	    float histwidth = 0;
	    size_t numhistbins = 200;
	    float min = 0;
	    float max = 0;
	    float threshold = 0;
	    mins = (float *)malloc(sizeof(float)*volsize);
	    memset(mins, '\0', sizeof(float)*volsize);
	    memcpy(mins, dataptr, sizeof(float)*volsize);
	    indxyzt = 0 + volsize;
	    for (indt = 1; indt < dimsizes[3]; indt++) { /* already copied first volume */
		for (indxyz = 0; indxyz < volsize; indxyz++) {
		    if (dataptr[indxyzt] < mins[indxyz]) {
			mins[indxyz] = dataptr[indxyzt];
		    }
		    indxyzt++;
		}
	    }
	    sortedmins = mergesort_float(mins, volsize);
	    min = sortedmins[0];
	    max = sortedmins[volsize-1];
	    free(sortedmins); sortedmins = NULL;
	    if (min == max) {
		fprintf(stderr, "The minimum value across time for all voxels is the same!  Check for zero-filled (or otherwise wacky) volumes in the data...\n");
		free(mins); mins = NULL;
		goto FAIL;
	    }
	    hist = (float *)malloc(sizeof(float)*(numhistbins+1));
	    histfit = (float *)malloc(sizeof(float)*numhistbins);
	    histwidth = (max - min) / numhistbins;
	    memset(hist, '\0', sizeof(float)*(numhistbins+1));
	    memset(histfit, '\0', sizeof(float)*numhistbins);
	    for (indxyz = 0; indxyz < volsize; indxyz++) {
		hist[(int)((mins[indxyz] - min) / histwidth)]++;
	    }
	    if (opt_debug) {
		fprintf(stdout, "hist:\n");
		for (indbin = 0; indbin < numhistbins; indbin++) {
		    fprintf(stdout, "%u %g (%g <= x < %g)\n", (unsigned int)indbin, hist[indbin], indbin*histwidth, (indbin+1)*histwidth);
		}
	    }
	    {
		gsl_matrix * gslX = NULL;
		gsl_vector * gsly = NULL;
		gsl_vector * gslc = NULL;
		gsl_matrix * gslcov = NULL;
		size_t polyorder = opt_filterorder;
		double gslchisq;

		gslX = gsl_matrix_alloc(numhistbins, polyorder + 1);
		gsly = gsl_vector_alloc(numhistbins);
		gslc = gsl_vector_alloc(polyorder + 1);
		gslcov = gsl_matrix_alloc(polyorder + 1, polyorder + 1);
		for (indbin = 0; indbin < numhistbins; indbin++) {
		    size_t ordernum = 0;
		    double accum = 0;
		    /* 1-based indexing for matrix vals */
		    for (ordernum = 0, accum = 1;
			 ordernum < polyorder + 1;
			 ordernum++, accum *= (indbin + 1)) {
			gsl_matrix_set(gslX, indbin, ordernum, accum);
		    }
		    gsl_vector_set(gsly, indbin, (double)hist[indbin]);
		}
		{
		    gsl_multifit_linear_workspace * work = NULL;
		    work = gsl_multifit_linear_alloc(numhistbins, polyorder + 1);
		    gsl_multifit_linear(gslX, gsly, gslc, gslcov, &gslchisq, work);
		    gsl_multifit_linear_free(work);
		}
		for (indbin = 0; indbin < numhistbins; indbin++) {
		    size_t ordernum = 0;
		    float accum = 0;
		    /* 1-based indexing for matrix vals */
		    histfit[indbin] = 0;
		    for (ordernum = 0, accum = 1;
			 ordernum < polyorder + 1;
			 ordernum++, accum *= (indbin + 1)) {
			histfit[indbin] +=
			    gsl_vector_get(gslc, ordernum) * accum;
		    }
		}
		if (opt_debug) {
		    fprintf(stdout, "histfit:\n");
		    for (indbin = 0; indbin < numhistbins; indbin++) {
			size_t ordernum = 0;
			fprintf(stdout, "%u %g (%g <= x < %g)\n", (unsigned int)indbin, histfit[indbin], indbin*histwidth, (indbin+1)*histwidth);
		    }
		}
		gsl_matrix_free(gslX);
		gsl_vector_free(gsly);
		gsl_vector_free(gslc);
		gsl_matrix_free(gslcov);
	    }
	    threshold = min + (histwidth * 0.5);
	    for (indbin = 1; indbin < numhistbins - 1; indbin++) {
		if (histfit[indbin-1] > histfit[indbin] &&
		    histfit[indbin] < histfit[indbin+1]) {
		    threshold = min + (histwidth * (indbin + 0.5));
		    break;
		}
	    }
	    if (opt_debug) {
		fprintf(stderr, "Chose localmin threshold %g (bin index %d).\n", threshold, (int)indbin);
	    }
	    if (indbin == numhistbins - 1) {
		fprintf(stderr, "Didn't find a local minimum!  Using threshold %g.\n", threshold);
	    }
	    for (indxyz = 0; indxyz < volsize; indxyz++) {
		results[indxyz] = (mins[indxyz] >= threshold);
	    }
	    free(hist); hist = NULL;
	    free(histfit); histfit = NULL;
	    free(mins); mins = NULL;
#endif /* HAVE_LIBGSL */
	} else {
	    fprintf(stderr, "Unsupported mask method '%s'\n", opt_method);
	    goto FAIL;
	}

	/* write out results */
	outdatarec = bxh_datarec_copy(bdr.datarec);
	while (outdatarec->numdims > 3) {
	    outdatarec->numdims--;
	    bxh_datarec_dimdata_free(&outdatarec->dimensions[outdatarec->numdims]);
	}
	outdatarec->numdims = 3;
	free(outdatarec->elemtype);
	outdatarec->elemtype = strdup("uint8");
	bxh_datarec_frags_free(outdatarec);
	bxh_datarec_addfrag(outdatarec, outputfile, 0, sizeof(char) * pagesizes[2], outputbxh, 1);
	if (bxh_datarec_writeToElement(bdr.imagedatap, outdatarec) != 0) {
	    fprintf(stderr, "Failed writing datarec\n");
	    return -1;
	}
	tmpdatarec = bdr.datarec;
	bdr.datarec = outdatarec;
	if (bxh_addAutoHistoryEntry(docp, argv[0], &inputfile, 1) != 0) {
	    fprintf(stderr, "Error adding history entry\n");
	    return -1;
	}
	writeBXHAndNIIGZ(outputbase, &bdr, results, 0);
	bdr.datarec = tmpdatarec; tmpdatarec = NULL;

	free(results);
	bxh_datarec_free(outdatarec); outdatarec = NULL;
    }

    goto EXIT;

  FAIL:
    retval = -1;

  EXIT:
    bxh_datareaddata_free(&bdr);
    if (outputbxh)
	free(outputbxh);
    if (outputfile)
	free(outputfile);
    if (outputfilegz)
	free(outputfilegz);
    if (outputbase)
	free(outputbase);
    if (outdatarec)
	bxh_datarec_free(outdatarec);
    if (opt_method) {
	free(opt_method); opt_method = NULL;
    }
    if (opt_filterthresh) {
	free(opt_filterthresh); opt_filterthresh = NULL;
    }
    if (opt_filterrank) {
	free(opt_filterrank); opt_filterrank = NULL;
    }
    return retval;
}

/*
 * $Log: In-line log eliminated on transition to SVN; use svn log instead. $
 * Revision 1.17  2007/12/10 16:40:04  gadde
 * Write out gzipped NIFTI (with BXH/XCEDE headers) as default
 *
 * Revision 1.16  2006/06/01 15:19:41  gadde
 * Use float instead of double to save memory.
 *
 * Revision 1.15  2006/04/07 14:47:22  gadde
 * Zero out bdr before using (or freeing)!
 *
 * Revision 1.14  2005/11/02 15:32:02  gadde
 * Add option to select polynomial order for 'localmin' method
 *
 * Revision 1.13  2005/09/20 18:37:54  gadde
 * Updates to versioning, help and documentation, and dependency checking
 *
 * Revision 1.12  2005/09/19 16:31:56  gadde
 * Documentation and help message updates.
 *
 * Revision 1.11  2005/09/14 15:19:17  gadde
 * Some -Wall fixes.
 *
 * Revision 1.10  2005/09/14 14:49:29  gadde
 * Type conversion updates to fix win32 warnings
 *
 * Revision 1.9  2005/03/30 20:08:17  gadde
 * Put usage in help.
 *
 * Revision 1.8  2005/03/18 16:59:42  gadde
 * Allow 3-D data.
 *
 * Revision 1.7  2005/02/04 18:55:30  gadde
 * Bring polynomial order back down to 5, but actually do x^5 this time!
 *
 * Revision 1.6  2005/02/03 18:36:56  gadde
 * increase the polynomial order for method localmin, and add optional debugging messages
 *
 * Revision 1.5  2004/12/20 20:25:59  gadde
 * Add overwrite option.
 *
 * Revision 1.4  2004/12/16 22:09:28  gadde
 * Add "rank" method.
 *
 * Revision 1.3  2004/12/13 20:07:55  gadde
 * Initialize bdr before first FAIL.
 *
 * Revision 1.2  2004/12/13 20:07:05  gadde
 * Initialize bdr.
 *
 * Revision 1.1  2004/11/12 15:04:08  gadde
 * Initial commit.
 *
 */
