/*!
 * \file  Main.cpp
 * \brief Command line tool which performs ODVBA on a set of brain images.
 *
 * For copyright information please see Copyright.txt in the root
 * directory of the project.
 *
 * Contact: SBIA Group <sbia-software@uphs.upenn.edu>
 */

#include <stdio.h>
#include <time.h>

#include "SbiaOdvbaAlgorithm.h"
#include "SbiaOdvbaUtilities.h"

#include "MainAux.h"


/*!
 * \def   SBIA_ODVBA_USE_MPI
 * \brief Whether parallelized MPI implementation is used.
 */
#if !defined (SBIA_ODVBA_USE_MPI)
#  define SBIA_ODVBA_USE_MPI 0
#endif

#if SBIA_ODVBA_USE_MPI
#  include <mpi.h>
#endif

/*!
 * \def   SBIA_ODVBA_SYNC_STARTUP
 * \brief Whether to sync the startup, i.e., all process wait for all
 *        processes to be initialized before they get their hands dirty.
 */
#if !defined (SBIA_ODVBA_SYNC_STARTUP)
#  define SBIA_ODVBA_SYNC_STARTUP 1
#endif


using namespace sbia::odvba;


//////////////////////////////////////////////////////////////////////////////
// usage / help
//////////////////////////////////////////////////////////////////////////////

/*!
 * \brief Prints usage information / help.
 *
 * \param progName Name of program.
 */
void usage (const char *progName)
{
	Options opt; // default options

	version (progName);
	printf ("\n");
	printf ("Description:\n");
	printf ("  This program implements a group analysis method named\n");
	printf ("  Optimally-Discriminative Voxel-Based Analysis (ODVBA).\n");
	printf ("  \n");
	printf ("  T. Zhang and C. Davatzikos; ODVBA: Optimally-Discriminative\n");
	printf ("  Voxel-Based Analysis\n");
	printf ("\n");
	printf ("Usage:\n");
	printf ("  %s [options] <subjects.txt> [<index.txt>] [<NI.txt>]\n", progName);
	printf ("\n");
	printf ("Required options:\n");
	printf ("  <subjects.txt>       : The subjets list which specifies the input data given as NIfTI-1 images.\n");
	printf ("  [-s --sizeNI <real>] : Size of neighborhood in mm.\n");
	printf ("                         Required and used only if <NI.txt> file not provided.\n");
	printf ("  [-n --numNI <int>]   : Number of neighborhoods.\n");
	printf ("                         At most, the neighborhood around each non-zero voxel can be\n");
	printf ("                         considered, i.e., numNI <= m, where m is the number of non-zero voxels.\n");
	printf ("                         Required and used only if <NI.txt> file not provided.\n");
	printf ("  [-e --numVox <int>]  : Number of voxels used for each neighborhood.\n");
	printf ("                         Required and used only if <NI.txt> file not provided.\n");
	printf ("\n");
	printf ("Options:\n");
	printf ("  <index.txt>          : Location of non-zero voxel in the volumes.\n");
	printf ("  <NI.txt>             : Location of neighorhood for each voxel in the volumes.\n");
	printf ("  [--maps <filename>]  : Filename used to output the group analysis results.\n");
	printf ("                         If this option is not specified, only the final\n");
	printf ("                         image of p-values is written.\n");
	printf ("  [--perms <filename>] : If the number of permutation tests specified by the option numPerm\n");
	printf ("                         is zero, the pre-computed permutations stored in the given\n");
	printf ("                         file are used. Otherwise, the generated random permutations used for\n");
	printf ("                         the permutation tests are written to the specified file.\n");
	printf ("                         If this option is present but not numPerm, numPerm is assumed to\n");
	printf ("                         set to zero, hence, the permutations will be read from the specified\n");
	printf ("                         file. This option is used for regression testing.\n");
	printf ("  [-o --out <prefix>]  : Filename prefix of the output volume of p-values which\n");
	printf ("                         will be written as NIfTI-1 image with voxel type float.\n");
	printf ("                         By default the image header is written to 'p.hdr' and the\n");
	printf ("                         raw image data is written to 'p.img'.\n");
	printf ("  [-p --numPerm <int>] : The number of permutations to test.\n");
	printf ("                         The default value is %d.\n", opt.nPerm);
	printf ("  [--phi <double>]     : Exponent phi of discrimination degree\n");
	printf ("                         (cf. Eq.(11) of MICCAI paper).\n");
	printf ("                         The default value is %f.\n", opt.phi);
	printf ("  [-h --help]          : Print help and exit.\n");
	printf ("  [-u --usage]         : Print usage information and exit.\n");
	printf ("  [-V --version]       : Print version information and exit.\n");
	printf ("  [-v --verbose]       : Enable verbose messages. Can be specified multiple times\n");
	printf ("                         to increase the verbosity.\n");
	printf ("\n");
	printf ("Example:\n");
	printf ("  %s -p 100 --phi 1 -n 10000 -e 400 -s 15 subjects.txt\n", progName);
	printf ("  %s -p 100 --phi 1 subjects.txt index.txt NI.txt\n", progName);
}

//////////////////////////////////////////////////////////////////////////////
// main
//////////////////////////////////////////////////////////////////////////////

/*!
 * \brief Main function of program.
 *
 * \param [in] argc Number of command line arguments.
 * \param [in] argv Command line arguments.
 *
 * \return Exit status of program.
 *
 * \retval EXIT_SUCCESS on success.
 * \retval EXIT_FAILURE on failure.
 */
int main (int argc, char *argv[])
{
	const char *progName = getProgName (argv);
	bool        ok       = true;

	// -----------------------------------------------------------------------
	// initialize process
	// -----------------------------------------------------------------------

	int nProc = 1;
	int rank  = 0;

#if SBIA_ODVBA_USE_MPI
	char procName [MPI_MAX_PROCESSOR_NAME];
	int  procNameLen = 0;

	MPI_Init (&argc, &argv);
	MPI_Comm_size (MPI_COMM_WORLD, &nProc);
	MPI_Comm_rank (MPI_COMM_WORLD, &rank);
	MPI_Get_processor_name (procName, &procNameLen);
#endif // SBIA_ODVBA_USE_MPI

	// process ID used in messages
	char procId [20];

	if (nProc > 1) sprintf (procId, "(Process %d) ", rank + 1);
	else           procId [0] = '\0';

	// -----------------------------------------------------------------------
	// options
	// -----------------------------------------------------------------------

	static struct option long_options [] =
	{
		{"out",           required_argument, NULL, 'o'},
		{"maps",          required_argument, NULL, 'm'},
		{"perms",         required_argument, NULL, 'r'},
		{"numPerm",       required_argument, NULL, 'p'},
		{"phi",           required_argument, NULL, 'a'},
		{"numNI",         required_argument, NULL, 'n'},
		{"numVox",        required_argument, NULL, 'e'},
		{"sizeNI",        required_argument, NULL, 's'},
		{"usage",         no_argument,       NULL, 'u'},
		{"help",          no_argument,       NULL, 'h'},
		{"version",       no_argument,       NULL, 'V'},
		{"Version",       no_argument,       NULL, 'V'},
		{"verbose",       no_argument,       NULL, 'v'},
        {0, 0, 0, 0}
	}; // struct long_options

	int c      = -1;
	int optidx = 0;

	// default options 
	int         verbosity   = 0;        // verbosity of messages
	Options     opt;                    // options of ODVBA algorithm
	const char *sublistFile = NULL;     // subjects list
	const char *indexFile   = NULL;     // index file
	const char *NIFile      = NULL;     // neighborhood file
	const char *permsFile   = NULL;     // permutations file (regression testing)
	const char *mapsFile    = NULL;     // group analysis results output file
	const char *pImageFile  = "p.hdr";  // output image of p-values
	int         numNI       = 0;        // number of neighborhoods
	double      sizNI       = 0;        // size of each neighborhood in mm
	int         numVox      = 0;        // number of voxels sampled from each neighborhood
	bool        numPermOpt  = false;    // whether the number of permutations was specified

	// parse command line options
	while ((c = getopt_long (argc, argv, "o:p:n:e:s:uhVv", long_options, &optidx)) != -1)
	{
		switch (c)
		{
		case 'o':
			pImageFile = optarg;
			break;

		case 'm':
			mapsFile = optarg;
			break;

		case 'p':
			opt.nPerm = atoi (optarg);
			numPermOpt = true;
			break;

		case 'r':
			permsFile = optarg;
			break;

		case 'a':
			opt.phi = atof (optarg);
			break;

		case 'n':
			numNI = atoi (optarg);
			break;

		case 'e':
			numVox = atoi (optarg);
			break;

		case 's':
			sizNI = atof (optarg);
			break;

		case 'u':
			// fall-through intended
		case 'h':
			if (rank == 0) usage (progName);
#if SBIA_ODVBA_USE_MPI
			MPI_Finalize ();
#endif // SBIA_ODVBA_USE_MPI
			exit (EXIT_SUCCESS);

		case 'V':
			if (rank == 0) version (progName);
#if SBIA_ODVBA_USE_MPI
			MPI_Finalize ();
#endif // SBIA_ODVBA_USE_MPI
			exit (EXIT_SUCCESS);

		case 'v':
			++ verbosity;
			break;

		case '?':
			// getopt_long already printed an error message
			if (rank == 0) usage (progName);
#if SBIA_ODVBA_USE_MPI
			MPI_Finalize ();
#endif // SBIA_ODVBA_USE_MPI
			exit (EXIT_FAILURE);
		}
	}

	argc -= optind;
	argv += optind;

	if (argc < 1 || argc > 3)
	{
		if (rank == 0) usage (progName);
#if SBIA_ODVBA_USE_MPI
		MPI_Finalize ();
#endif // SBIA_ODVBA_USE_MPI
		exit (EXIT_FAILURE);
	}

	sublistFile = argv [0];
	if (argc > 1) indexFile = argv [1];
	if (argc > 2) NIFile    = argv [2];

	// read permutations from file if number of permutations was not
	// specified but a permutations file
	if (!numPermOpt && permsFile) opt.nPerm = 0;

	// are all required inputs specified?
	if (!sublistFile || (!NIFile && (numNI == 0 || numVox == 0 || sizNI <= 0)) || !nifti_validfilename (pImageFile))
	{
		if (rank == 0)
		{
			printf ("Not all required inputs were specified!\n\n");
			usage (progName);
		}
#if SBIA_ODVBA_USE_MPI
		MPI_Finalize ();
#endif // SBIA_ODVBA_USE_MPI
		exit (EXIT_FAILURE);
	}

	// check options
	if (opt.nPerm < 0 && !permsFile)
	{
		if (rank == 0)
		{
			printf ("Invalid number of permutations!\n\n");
			usage (progName);
		}
#if SBIA_ODVBA_USE_MPI
		MPI_Finalize ();
#endif // SBIA_ODVBA_USE_MPI
		exit (EXIT_FAILURE);
	}

	// -----------------------------------------------------------------------
	// print process information / synchronize processes
	// -----------------------------------------------------------------------

#if SBIA_ODVBA_USE_MPI
#  if SBIA_ODVBA_SYNC_STARTUP
	MPI_Status status;
	int        token = 0;

	time_t startTimeSync = clock (); // time when synchronization was initiated

	if (rank == 0)
	{
		// print information about process
		printf ("%sRunning on '%s'\n", procId, procName);
		fflush (stdout);

		for (int i = 1; i < nProc; ++ i)
		{
			// send off slave
			MPI_Send (&token, 1, MPI_INT, i, 0, MPI_COMM_WORLD);

			// wait until slave process print process information
			MPI_Recv (&token, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &status);
		}
	}
	else
	{
		// wait for master process to send go signal
		MPI_Recv (&token, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);

		// print information about process
		printf ("%sRunning on '%s'\n", procId, procName);
		fflush (stdout);

		// let master send off the next slave
		MPI_Send (&token, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
	}

	MPI_Barrier (MPI_COMM_WORLD);

	if (rank == 0 && nProc > 1)
	{
		double duration = static_cast <double> (clock () - startTimeSync) / CLOCKS_PER_SEC;
		printf ("%sProcesses synchronized in %.2f sec\n", procId, duration);
	}
#  else // SBIA_ODVBA_SYNC_STARTUP
	printf ("%sRunning on '%s'\n", procId, procName);
	fflush (stdout);
#  endif // SBIA_ODVBA_SYNC_STARTUP
#endif // SBIA_ODVBA_USE_MPI

	// -----------------------------------------------------------------------
	// run...
	// -----------------------------------------------------------------------

	// start time
	time_t startTime = clock ();

	// permutation test mode (regression testing)
	const int PERMMODE_NONE  = 0; // do not read/write permutations from/to file
	const int PERMMODE_WRITE = 1; // generate random permutations and write them to file
	const int PERMMODE_READ  = 2; // read permutations from file

	const int permMode = permsFile
	                     ? ((opt.nPerm > 0) ? PERMMODE_WRITE : PERMMODE_READ)
	                     : PERMMODE_NONE;

	// -----------------------------------------------------------------------
	// input
	// -----------------------------------------------------------------------

	// read database
	CvMat         *data = NULL;
	Database       db;
	nifti_1_header hdr; // image header of first input image; used below for image output

	if (ok)
	{
		printf ("%sParsing subject list and reading image data\n", procId);
		fflush (stdout);

		ok = ((data = readData  (sublistFile, &db.n1, &hdr)) != NULL);

		if (!ok)
		{
			fprintf (stderr, "%sFailed to read image data\n", procId);
			fflush (stderr);
		}
	}

	// read/create index
	if (ok)
	{
		if (indexFile)
		{
			printf ("%sReading index\n", procId);
			fflush (stdout);

			ok = ((db.index = readCvMat (indexFile)) != NULL);

			if (!ok)
			{
				fprintf (stderr, "%sFailed to read index\n", procId);
				fflush (stderr);
			}
		}
		else
		{
			printf ("%sGenerating index\n", procId);
			fflush (stdout);

			ok = ((db.index = createIndex (data)) != NULL);

			if (!ok)
			{
				fprintf (stderr, "%sFailed to generate index\n", procId);
				fflush (stderr);
			}
		}
	}

	// read/create neighborhood
	if (ok)
	{
		if (NIFile)
		{
			printf ("%sReading neighborhoods\n", procId);
			fflush (stdout);

			ok = ((db.NI = readCvMat (NIFile)) != NULL);

			if (!ok)
			{
				fprintf (stderr, "%sFailed to read neighborhoods\n", procId);
				fflush (stderr);
			}
		}
		else
		{
			printf ("%sGenerating neighborhoods\n", procId);
			fflush (stdout);

			// note: one half voxel is subtracted because createNI accounts for it
			int sizNIx = static_cast <int> (round (sizNI / hdr.pixdim [1] - 0.5)); // size of neighborhoods along x dimension
			int sizNIy = static_cast <int> (round (sizNI / hdr.pixdim [2] - 0.5)); // size of neighborhoods along y dimension
			int sizNIz = static_cast <int> (round (sizNI / hdr.pixdim [3] - 0.5)); // size of neighborhoods along z dimension
		
			int xyz_units = XYZT_TO_SPACE (hdr.xyzt_units);

			if (xyz_units == NIFTI_UNITS_MICRON)
			{
				sizNIx /= 1000;
				sizNIy /= 1000;
				sizNIz /= 1000;
			}
			else if (xyz_units == NIFTI_UNITS_METER)
			{
				sizNIx *= 1000;
				sizNIy *= 1000;
				sizNIz *= 1000;
			}

			if (sizNIx <= 0) sizNIx = 1;
			if (sizNIy <= 0) sizNIy = 1;
			if (sizNIz <= 0) sizNIz = 1;

			ok = ((db.NI = createNI (db.index, hdr.dim[1], hdr.dim[2], hdr.dim[3], sizNIx, sizNIy, sizNIz, numNI, numVox)) != NULL);

			if (!ok)
			{
				fprintf (stderr, "%sFailed to generate neighborhoods\n", procId);
				fflush (stderr);
			}
		}
	}

	// extract image data of non-zero voxels
	if (ok)
	{
		printf ("%sExtracting image data\n", procId);
		fflush (stdout);

		ok = ((db.X = createX (data, db.index)) != NULL);

		if (!ok)
		{
			fprintf (stderr, "%sFailed to extract image data\n", procId);
			fflush (stderr);
		}
	}

	// intermediate clean up
	if (data)
	{
		cvReleaseMat (&data);
		data = NULL;
	}

	// read permutations
	if (ok && permMode == PERMMODE_READ)
	{
		printf ("%sReading permutations\n", procId);
		fflush (stdout);

		ok = ((opt.perms = readCvMat (permsFile)) != NULL);

		if (!ok)
		{
			fprintf (stderr, "%sFailed to read permutations\n", procId);
			fflush (stderr);
		}
	}

	// everything ok?
	if (!ok)
	{
		db.release ();

#if SBIA_ODVBA_USE_MPI
		MPI_Finalize ();
#endif // SBIA_ODVBA_USE_MPI
		exit (EXIT_FAILURE);
	}

	// data related constants
	const int n = db.X->cols;            // number of subjects
	const int p = ((opt.nPerm > 0)       // number of permutations (total)
	                ? opt.nPerm
	                : (opt.perms
	                   ? opt.perms->rows
	                   : 0));

	// -----------------------------------------------------------------------
	// prepare processing
	// -----------------------------------------------------------------------

	int *nPerms = new int [nProc]; // number of permutations per process

	// distribute work among processes
	for (int i = 0; i < nProc; ++ i) nPerms [i] = 0;

	if (nProc == 1)
	{
		nPerms [0] = p;
	}
	else
	{
		int i = 1;
		for (int np = 0; np < p; ++ np, ++ i)
		{
			if (i == nProc) i = 1;
			++ nPerms [i];
		}
	}

	// adjust permutations for this process
	if (permMode == PERMMODE_READ)
	{
		opt.nPerm = 0;

		if (nProc > 1)
		{
			CvMat *perms = NULL;

			if (nPerms [rank] > 0)
			{
				ok = (perms = cvCreateMat (nPerms [rank], n, CV_32FC1)) != NULL;
			}

			if (perms)
			{
				int nSkip = 0;
				for (int i = 0; i < rank; ++ i) nSkip += nPerms [i];

				memcpy (perms->data.fl, opt.perms->data.fl + nSkip * n, nPerms [rank] * n * sizeof (float));
			}

			cvReleaseMat (&opt.perms);
			opt.perms = perms;
		}
	}
	else if (permMode == PERMMODE_WRITE)
	{
		opt.nPerm = nPerms [rank];
		opt.perms = cvCreateMat (nPerms [rank], n, CV_32FC1);

		if (!opt.perms)
		{
			fprintf (stderr, "%sFailed to allocate memory\n", procId);
			ok = false;
		}
	}
	else
	{
		opt.nPerm = nPerms [rank];
	}

	// everything ok?
	if (!ok)
	{
		delete [] nPerms;
		nPerms = NULL;

		db.release ();

#if SBIA_ODVBA_USE_MPI
		MPI_Finalize ();
#endif // SBIA_ODVBA_USE_MPI
		exit (EXIT_FAILURE);
	}

	// -----------------------------------------------------------------------
	// perform group analysis
	// -----------------------------------------------------------------------

	time_t startTimeAnalysis = clock (); // time when processing started

	CvMat *maps = NULL; // group analysis results

	if (rank == 0)
	{
		if (nPerms [rank] > 0) printf ("%sPerforming initial analysis and %d permutation test(s)\n", procId, nPerms [rank]);
		else                   printf ("%sPerforming initial analysis\n", procId);
	}
	else
	{
		if (nPerms [rank] > 0) printf ("%sPerforming %d permutation test(s)\n", procId, nPerms [rank]);
		else                   printf ("%sNothing to do\n", procId);
	}
	fflush (stdout);

	if (rank == 0 || opt.nPerm > 0 || (opt.perms && opt.perms->rows > 0))
	{
		ok = ((maps = performAnalysis (db, opt, ((rank == 0) ? true : false), verbosity, procId)) != NULL);

		if (ok)
		{
			double t = static_cast <float> (clock() - startTimeAnalysis) / CLOCKS_PER_SEC;
			printf ("%sPerformed group analysis in %.2f sec\n", procId, t);
			fflush (stdout);
		}
		else
		{
			fprintf (stderr, "%sFailed to perform group analysis\n", procId);
			fflush (stderr);
		}
	}

	// everything ok?
	if (!ok)
	{
		if (maps)
		{
			cvReleaseMat (&maps);
			maps = NULL;
		}

		delete [] nPerms;
		nPerms = NULL;

		db.release ();

#if SBIA_ODVBA_USE_MPI
		MPI_Finalize ();
#endif // SBIA_ODVBA_USE_MPI
		exit (EXIT_FAILURE);
	}

	// -----------------------------------------------------------------------
	// assemble results
	// -----------------------------------------------------------------------

#if SBIA_ODVBA_USE_MPI
	// if subprocess...
	if (rank > 0)
	{
		// send results to master process
		if (nPerms [rank] > 0)
		{
			printf ("%sWaiting for process 1 to collect results\n", procId);
			fflush (stdout);

			if (MPI_Send (maps->data.fl, maps->rows * maps->cols, MPI_FLOAT, 0, 0, MPI_COMM_WORLD) == MPI_SUCCESS)
			{
				printf ("%sSent results to process 1\n", procId);
				fflush (stdout);
			}
			else
			{
				fprintf (stderr, "%sFailed to send results to process 1\n", procId);
				fflush (stderr);

				ok = false;
			}
		}
	}
	// if master (and not only) process...
	else if (nProc > 1)
	{
		// allocate memory for assembled results, copy results of master
		// process and replace maps by matrix big enough for all results
		printf ("%sAllocating memory for collective results\n", procId);
		fflush (stdout);

		CvMat *maps2 = NULL;

		ok = ((maps2 = cvCreateMat (1 + p, maps->cols, CV_32FC1)) != NULL);

		if (ok)
		{
			printf ("%sFilling in own results\n", procId);
			fflush (stdout);

			memcpy (maps2->data.fl, maps->data.fl, (1 + nPerms [0]) * maps->cols * sizeof (float));

			cvReleaseMat (&maps);
			maps = maps2;
		}
		else
		{
			fprintf (stderr, "%sFailed to allocate memory\n", procId);
			fflush (stderr);
		}

		// collect results of other processes
		if (ok)
		{
			printf ("%sCollecting results from the other process(es)\n", procId);
			fflush (stdout);

			float *ptr = maps->data.fl + (1 + nPerms [0]) * maps->cols;

			for (int i = 1; i < nProc; ++ i)
			{
				if (nPerms [i] > 0)
				{
					MPI_Status status;

					if (MPI_Recv (ptr, nPerms [i] * maps->cols, MPI_FLOAT, i, 0, MPI_COMM_WORLD, &status) == MPI_SUCCESS)
					{
						printf ("%sReceived results from process %d\n", procId, i + 1);
						fflush (stdout);
					}
					else
					{
						fprintf (stderr, "%sFailed to collect results from process %d\n", procId, i + 1);
						fflush (stderr);

						ok = false;
					}

					ptr += nPerms [i] * maps->cols;
				}
				else
				{
					printf ("%sProcess %d had nothing to do\n", procId, i + 1);
					fflush (stdout);
				}
			}
		}
	}
#endif // SBIA_ODVBA_USE_MPI
	// otherwise, there is nothing to assemble...

	// intermediate clean up
	delete [] nPerms;

	// everything ok?
	if (!ok)
	{
		if (maps)
		{
			cvReleaseMat (&maps);
			maps = NULL;
		}

		db.release ();

#if SBIA_ODVBA_USE_MPI
		MPI_Finalize ();
#endif // SBIA_ODVBA_USE_MPI
		exit (EXIT_FAILURE);
	}

	// -----------------------------------------------------------------------
	// assemble permutations
	// -----------------------------------------------------------------------

	if (permMode == PERMMODE_WRITE)
	{
#if SBIA_ODVBA_USE_MPI
		// if subprocess...
		if (rank > 0)
		{
			// send permutations to master process
			printf ("%sWaiting for process 1 to collect permutations\n", procId);
			fflush (stdout);

			if (MPI_Send (opt.perms->data.fl, nPerms [rank] * opt.perms->cols, MPI_FLOAT, 0, 0, MPI_COMM_WORLD) == MPI_SUCCESS)
			{
				printf ("%sSent permutations to process 1\n", procId);
				fflush (stdout);
			}
			else
			{
				fprintf (stderr, "%sFailed to send permutations to process 1\n", procId);
				fflush (stderr);

				ok = false;
			}
		}
		// if master (and not only) process...
		else if (nProc > 1)
		{
			// allocate memory for assembled permutations, copy permutations of
			// master process and replace perms by matrix big enough for all
			// permutations
			printf ("%sAllocating memory for collective permutations\n", procId);
			fflush (stdout);

			CvMat *perms2 = NULL;

			ok = ((perms2 = cvCreateMat (1 + p, n, CV_32FC1)) != NULL);

			if (ok && nPerms [0] > 0)
			{
				printf ("%sFilling in own permutations\n", procId);
				fflush (stdout);

				memcpy (perms2->data.fl, opt.perms->data.fl, nPerms [0] * n * sizeof (float));

				cvReleaseMat (&opt.perms);
				opt.perms = perms2;
			}
			else
			{
				fprintf (stderr, "%sFailed to allocate memory\n", procId);
				fflush (stderr);
			}

			// collect results of other processes
			if (ok)
			{
				printf ("%sCollecting permutations from the other process(es)\n", procId);
				fflush (stdout);

				float *ptr = opt.perms->data.fl + nPerms [0] * n;

				for (int i = 1; i < nProc; ++ i)
				{
					if (nPerms [i] > 0)
					{
						MPI_Status status;

						if (MPI_Recv (ptr, nPerms [i] * n, MPI_FLOAT, i, 0, MPI_COMM_WORLD, &status) == MPI_SUCCESS)
						{
							printf ("%sReceived permutations from process %d\n", procId, i + 1);
							fflush (stdout);
						}
						else
						{
							fprintf (stderr, "%sFailed to collect permutations from process %d\n", procId, i + 1);
							fflush (stderr);

							ok = false;
						}

						ptr += nPerms [i] * n;
					}
					else
					{
						printf ("%sProcess %d had nothing to do\n", procId, i + 1);
						fflush (stdout);
					}
				}
			}
		}
#endif // SBIA_ODVBA_USE_MPI
		// otherwise, nothing to assemble...
	}

	// -----------------------------------------------------------------------
	// output results (optional; master process only)
	// -----------------------------------------------------------------------

	if (rank == 0 && mapsFile)
	{
		printf ("%sWriting group analysis results to file '%s'\n", procId, mapsFile);
		fflush (stdout);

		if (!writeCvMat (mapsFile, maps, "%f"))
		{
			fprintf (stderr, "%sFailed to write group analysis results\n", procId);
			fflush (stderr);
		}
	}

	// -----------------------------------------------------------------------
	// output permutations (optional; master process only)
	// -----------------------------------------------------------------------

	if (rank == 0 && permMode == PERMMODE_WRITE)
	{
		printf ("%sWriting permutations to file '%s'\n", procId, permsFile);
		fflush (stdout);

		if (!writeCvMat (permsFile, opt.perms, "%.0f"))
		{
			fprintf (stderr, "%sFailed to write permutations\n", procId);
			fflush (stderr);

			ok = false;
		}
	}

	// -----------------------------------------------------------------------
	// output p-image (master process only)
	// -----------------------------------------------------------------------

	if (rank == 0)
	{
		CvMat *pImage = NULL;

		printf ("%sWriting p-image to file '%s'\n", procId, pImageFile);
		fflush (stdout);

		pImage = getPImage (maps, db.index, hdr.dim[1] * hdr.dim[2] * hdr.dim[3]);

		if (pImage)
		{
			hdr.intent_code = NIFTI_INTENT_PVAL;

			strncpy (hdr.descrip,     "Output p-values of performed ODVBA group analysis", 80);
			strncpy (hdr.intent_name, nifti_intent_string (hdr.intent_code),               16);

			if (!writeNiftiImage (pImageFile, hdr, pImage))
			{
				fprintf (stderr, "%sFailed to write p-image\n", procId);
				fflush (stderr);

				ok = false;
			}

			cvReleaseMat (&pImage);
			pImage = NULL;
		}
		else
		{
			fprintf (stderr, "%sFailed to generate p-image\n", procId);
			fflush (stderr);

			ok = false;
		}
	}

	// -----------------------------------------------------------------------
	// finalize
	// -----------------------------------------------------------------------

	if (maps)
	{
		cvReleaseMat (&maps);
		maps = NULL;
	}

	db.release ();

	double t = static_cast <double> (clock() - startTime) / CLOCKS_PER_SEC;
	printf ("%sFinished in %.2f sec\n", procId, t);

#if SBIA_ODVBA_USE_MPI
	MPI_Finalize ();
#endif // SBIA_ODVBA_USE_MPI

	exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
}
