static const char rcsid[] = "$Id: bxh_eventresp.cpp,v 1.13 2006-11-02 14:30:43 gadde Exp $";

/*
 * bxh_eventresp.cpp --
 * 
 *  Given event files, tie response events to the closest stimulus
 *  event that matches, with restrictions.
 */

#include <bxh_config.h>

#include <stdio.h>

#ifdef WIN32
#define strcasecmp stricmp
#endif

#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifndef WIN32
#include <unistd.h>
#endif
#include <math.h>

#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>

#include <string>
#include <map>
#include <vector>

#include "bxh_datarec.h"
#include "bxh_eventlib.h"
#include "opts.h"

#ifndef XMLH_VERSIONSTR
#define XMLH_VERSIONSTR "(no version specified)"
#endif

#define VERBOSE 0

#undef FUNC
#define FUNC "get_event_list"
static
xmlDocPtr
get_event_list(std::vector<std::string> eventfilelist,
	       char * stimquery, char * stimfilterquery,
	       char * respquery, char * respfilterquery,
	       std::multimap<double, xmlNodePtr> & stimlist,
	       std::multimap<double, xmlNodePtr> & resplist,
	       FILE * logfp)
{
    xmlDocPtr retval = NULL;
    const char * queryprefix = "//*[local-name()='events']/*[local-name()='event']";
    std::vector<xmlDocPtr> docs;
    xmlDocPtr mergedoc = NULL;
    xmlDocPtr sortdoc = NULL;
    xmlDocPtr canondoc = NULL;
    size_t indefile = 0;
    const char * stimlabel = "STIMULUS";
    const char * resplabel = "RESPONSE";
    std::map<double, qvec> stimmap;
    std::map<double, qvec> respmap;

    for (indefile = 0; indefile < eventfilelist.size(); indefile++) {
	xmlDocPtr doc = xmlParseFile(eventfilelist[indefile].c_str());
	if (doc == NULL) {
	    fprintf(stderr, "Error: could not parse file %s\n", eventfilelist[indefile].c_str());
	    goto FAIL;
	}
	docs.push_back(doc);
    }

    if ((mergedoc = merge_event_lists(docs, queryprefix, logfp)) == NULL) {
	fprintf(stderr, "Error merging event files!\n");
	goto FAIL;
    }
    if ((sortdoc = xmlCopyDoc(mergedoc, 1)) == NULL) {
	fprintf(stderr, "Error copying XML doc!\n");
	goto FAIL;
    }
    if (sort_event_list(sortdoc, queryprefix, logfp) != 0) {
	fprintf(stderr, "Error sorting events!\n");
	goto FAIL;
    }
    if ((canondoc = xmlCopyDoc(sortdoc, 1)) == NULL) {
	fprintf(stderr, "Error copying XML doc!\n");
	goto FAIL;
    }
    if (canonicalize_event_list(canondoc, queryprefix, logfp) != 0) {
	fprintf(stderr, "Error canonicalizing events!\n");
	goto FAIL;
    }

#if VERBOSE
    xmlDocDump(logfp, canondoc);
#endif

    if (match_events(sortdoc, 1, &stimquery, (char **)&stimlabel, stimmap, logfp) != 0) {
	goto FAIL;
    }
    if (filter_events(canondoc, 1, &stimfilterquery, (char **)&stimlabel, stimmap, logfp) != 0) {
	goto FAIL;
    }

    if (match_events(sortdoc, 1, &respquery, (char **)&resplabel, respmap, logfp) != 0) {
	goto FAIL;
    }
    if (filter_events(canondoc, 1, &respfilterquery, (char **)&resplabel, respmap, logfp) != 0) {
	goto FAIL;
    }

    {
	std::map<double, qvec>::iterator stimiter = stimmap.begin();
	std::map<double, qvec>::iterator respiter = respmap.begin();
	stimlist.clear();
	resplist.clear();
	while (stimiter != stimmap.end()) {
	    size_t numnodes;
	    size_t nodenum;
	    std::vector<xmlNodePtr> & qnodes = (*stimiter).second[0].qnodes;
#if VERBOSE
	    fprintf(stderr, "Adding stimulus %g to list\n", (*stimiter).first);
#endif
	    numnodes = qnodes.size();
	    for (nodenum = 0; nodenum < numnodes; nodenum++) {
		stimlist.insert(std::pair<double,xmlNodePtr>((*stimiter).first, qnodes[nodenum]));
	    }
	    stimiter++;
	}
	while (respiter != respmap.end()) {
	    size_t numnodes;
	    size_t nodenum;
	    std::vector<xmlNodePtr> & qnodes = (*respiter).second[0].qnodes;
#if VERBOSE
	    fprintf(stderr, "Adding response %g to list\n", (*respiter).first);
#endif
	    numnodes = qnodes.size();
	    for (nodenum = 0; nodenum < numnodes; nodenum++) {
		resplist.insert(std::pair<double,xmlNodePtr>((*respiter).first, qnodes[nodenum]));
	    }
	    respiter++;
	}
    }

    goto EXIT;
    
  FAIL:
    retval = NULL;
    
  EXIT:
    {
	size_t docind;
	for (docind = 0; docind < docs.size(); docind++) {
	    if (docs[docind])
		xmlFreeDoc(docs[docind]);
	}
    }
    if (mergedoc)
	xmlFreeDoc(mergedoc);
    if (canondoc)
	xmlFreeDoc(canondoc);
    retval = sortdoc;
    return retval;
}

#undef FUNC
#define FUNC "main"
int
main(int argc, char *argv[])
{
    struct stat statbuf;
    int retval = 0;
    int argind;
    const char * outputfile = NULL;
    FILE * logfp = NULL;
    xmlDocPtr sortdoc = NULL;
    
    char * opt_optsfromfile = NULL;
    char * opt_stimquery = NULL;
    char * opt_stimfilterquery = NULL;
    char * opt_respquery = NULL;
    char * opt_respfilterquery = NULL;
    const char * opt_querylang = "XPath";
    double opt_maxresptime = -1;
    char * opt_respdelayname = NULL;
    char * opt_embeddedrespdelayvalues = NULL;
    int opt_overwrite = 0;
    int opt_version = 0;
    int opt_reversemerge = 0;

#define NUMMOVEVALUES 20
    char * opt_movevalues[NUMMOVEVALUES+1];
    const char * opt_movedefault = "keep";
    char movedefaultchar;

    char * xpstimquery = NULL;
    char * xpstimfilterquery = NULL;
    char * xprespquery = NULL;
    char * xprespfilterquery = NULL;
    char * xprespdelayquery = NULL;

    std::multimap<double, xmlNodePtr> stimlist;
    std::multimap<double, xmlNodePtr> resplist;

    std::vector<std::string> eventfilelist;
    
    const int numopts = 15;
    opt_data opts[15] = {
	{ 0x0, OPT_VAL_NONE, NULL, 0, "",
	  "Usage:\n"
	  "  bxh_eventresp [opts] eventfiles... outputfile\n\n"
	  "This program takes event files as input, and selects stimulus "
	  "and response events (given user-specified queries).  "
	  "The responses are then merged into the closest stimulus event "
	  "within a given time interval from the response." },
	{ 0x0, OPT_VAL_NONE, NULL, 0, "", "" },
	{ OPT_FLAGS_FULL, OPT_VAL_BOOL, &opt_version, 1, "version",
	  "Print version string and exit." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_optsfromfile, 1, "optsfromfile",
	  "Program options (i.e. those starting with '--') will come from "
	  "this file.  "
	  "If this option is specified, then the options in the file "
	  "will be applied after all command-line options.  "
	  "The options (and their arguments) should be specified "
	  "one per line, with the leading '--' omitted." },
	{ OPT_FLAGS_FULL, OPT_VAL_BOOL, &opt_overwrite, 1, "overwrite",
	  "Overwrite existing output files (otherwise error and exit). " },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_querylang, 1, "querylanguage",
	  "The language used for all queries.  Valid values are 'XPath' and "
	  "'event'.  Case is irrelevant.  Default is "
	  "'XPath'." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_stimquery, 1, "stimquery",
	  "A query string to match stimulus events.  "
	  "This option is required." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_stimfilterquery, 1, "stimfilterquery",
	  "A query string to filter stimulus events." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_respquery, 1, "respquery",
	  "A query string to match response events.  "
	  "This option is required." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_respfilterquery, 1, "respfilterquery",
	  "A query string to filter stimulus events." },
	{ OPT_FLAGS_FULL, OPT_VAL_DOUBLE, &opt_maxresptime, 1, "maxresptime",
	  "Specifies the longest time interval (in the same units as the "
	  "onsets in the input file) within which a response can be "
	  "associated with a stimulus.  A negative value represents infinity "
	  "(default)." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_respdelayname, 1, "respdelayname",
	  "The name to be used to label the value for response delay "
	  "(time of response minus time of stimulus).  Default is not to "
	  "add this value." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_embeddedrespdelayvalues, 1, "embeddedrespdelayvalues",
	  "If the actual response delay is embedded within (and relative to) "
	  "an event that is not strictly a response event, this option lists "
	  "the names of the <value> elements (separated by commas) in the "
	  "(pseudo-)response events that would store the response delay.  "
	  "Only one value within each event may match this list.  This value "
	  "will be added to the default response delay (response event time "
	  "minus stimulus event time) to calculate the actual response "
	  "time/delay." },
	{ OPT_FLAGS_FULL, OPT_VAL_STR, &opt_movevalues[0], NUMMOVEVALUES, "movevalue",
	  "By default, all values are moved from matched responses to "
	  "matched stimuli.  If this option is specified one or more times, "
	  "only the values specified by instances of this option will be "
	  "moved.  Other values will be left alone." },
	{ OPT_FLAGS_FULL, OPT_VAL_BOOL, &opt_reversemerge, NUMMOVEVALUES, "reversemerge",
	  "This option reverses the merging process -- instead of moving "
	  "response event values into matching stimulus events, it will move "
	  "the matching stimulus event's values into the response event.  "
	  "The response delay value (if --respdelayname is specified) is "
	  "also put into the response event.  "
	  "Make sure this is what you really want to do!"
	}
    };

    memset(opt_movevalues, '\0', sizeof(char *)*(NUMMOVEVALUES+1));

    argc -= opt_parse(argc, argv, numopts, &opts[0], 0);
    if (opt_optsfromfile) {
	opt_parsefile(opt_optsfromfile, numopts, &opts[0], 0);
    }

    if (opt_version) {
	fprintf(stdout, "%s\n", XMLH_VERSIONSTR);
	exit(0);
    }
    if (opt_stimquery == NULL || opt_respquery == NULL) {
	fprintf(stderr, "Stimulus and/or response queries are missing.\nUse the --help option for more help.\n");
    }

    if (opt_movedefault &&
	strcmp(opt_movedefault, "move") != 0 &&
	strcmp(opt_movedefault, "copy") != 0 &&
	strcmp(opt_movedefault, "erase") != 0 &&
	strcmp(opt_movedefault, "keep") != 0) {
	fprintf(stderr, "--movedefault type '%s' not recognized (must be 'move', 'copy', 'erase', or 'keep').\n", opt_movedefault);
    }
    movedefaultchar = opt_movedefault[0];
    
    if (argc < 3) {
	fprintf(stderr, "Usage: %s [opts] inputeventfiles... outputeventfile\n", argv[0]);
	fprintf(stderr, "Not enough arguments.  Use the --help option for more help.\n");
	goto FAIL;
    }

    if (strcasecmp(opt_querylang, "XPath") != 0 &&
	strcasecmp(opt_querylang, "event") != 0) {
	fprintf(stderr, "Query language %s not recognized!\n", opt_querylang);
	goto FAIL;
    }

    {
	char ** queryplist[4] = { &opt_stimquery, &opt_stimfilterquery,
				  &opt_respquery, &opt_respfilterquery };
	char ** xpqueryplist[4] = { &xpstimquery, &xpstimfilterquery,
				    &xprespquery, &xprespfilterquery };
	int tmpind = 0;
	for (tmpind = 0; tmpind < 4; tmpind++) {
	    char * newquery = NULL;
	    char ** curqueryp = queryplist[tmpind];
	    if (*curqueryp == NULL || **curqueryp == '\0') {
		/* empty queryfilters match everything */
		newquery = strdup("true()");
	    } else if (strcasecmp(opt_querylang, "event") == 0 &&
		       (newquery = query2xpath(*curqueryp)) == NULL) {
		fprintf(stderr, "Bad query '%s'!\n", *curqueryp);
		goto FAIL;
	    }
	    if (newquery == NULL) {
		newquery = strdup(*curqueryp);
	    } else {
		fprintf(stderr, "query '%s' converted to XPath '%s'\n", *curqueryp, newquery);
	    }
	    *(xpqueryplist[tmpind]) = newquery;
	}
    }
    if (opt_embeddedrespdelayvalues) {
	char * curptr = opt_embeddedrespdelayvalues;
	char * delim = NULL;
	const char * prefix = "value[@name='";
	const char * infix = "'] | value[@name='";
	const char * suffix = "']";
	size_t prefixlen = strlen(prefix);
	size_t infixlen = strlen(infix);
	size_t suffixlen = strlen(suffix);
	size_t querylen = prefixlen;
	xprespdelayquery = strdup(prefix);
	while ((delim = strchr(curptr, ',')) != NULL) {
	    size_t namelen = (delim - curptr);
	    xprespdelayquery = (char *)realloc(xprespdelayquery, sizeof(char)*(querylen + namelen + infixlen + 1));
	    strncpy(xprespdelayquery + querylen, curptr, (delim - curptr));
	    querylen += (delim - curptr);
	    strncpy(xprespdelayquery + querylen, infix, infixlen);
	    querylen += infixlen;
	    xprespdelayquery[querylen] = '\0';
	    curptr = delim + 1;
	}
	size_t namelen = strlen(curptr);
	xprespdelayquery = (char *)realloc(xprespdelayquery, sizeof(char)*(querylen + namelen + suffixlen + 1));
	strncpy(xprespdelayquery + querylen, curptr, namelen);
	querylen += namelen;
	strncpy(xprespdelayquery + querylen, suffix, suffixlen);
	querylen += suffixlen;
	xprespdelayquery[querylen] = '\0';
    }

    outputfile = argv[argc-1];
    if (!opt_overwrite) {
	if (stat(outputfile, &statbuf) == 0) {
	    fprintf(stderr, "%s: output file '%s' exists.\nRemove file or use --overwrite.\n", argv[0], outputfile);
	    return -1;
	}
    }

    for (argind = 1; argind < argc - 1; argind++) {
	eventfilelist.push_back(std::string(argv[argind]));
    }
    if ((sortdoc = get_event_list(eventfilelist,
				  xpstimquery, xpstimfilterquery,
				  xprespquery, xprespfilterquery,
				  stimlist, resplist,
				  stderr)) == NULL) {
	fprintf(stderr, "%s: Error getting event list!\n", argv[0]);
	goto FAIL;
    }

    {
	std::multimap<double, xmlNodePtr>::iterator respiter;
	xmlXPathContextPtr xpctxt = NULL;
	if (xprespdelayquery && ((xpctxt = xmlXPathNewContext(sortdoc)) == NULL)) {
	    fprintf(stderr,"Error: unable to create new XPath context\n");
	    goto FAIL;
	}
	for (respiter = resplist.begin(); respiter != resplist.end(); respiter++) {
	    double resptime = (*respiter).first;
	    xmlNodePtr respnode = (*respiter).second;
	    if (xprespdelayquery) {
		xmlXPathObjectPtr xpobj = NULL;
		xpctxt->node = respnode;
		if ((xpobj = xmlXPathEvalExpression((xmlChar *)xprespdelayquery, xpctxt)) == NULL) {
		    fprintf(stderr, "Error: unable to evaluate xpath expression '%s'.\n", xprespdelayquery);
		    goto FAIL;
		}
		if (xpobj->type != XPATH_NODESET) {
		    fprintf(stderr, "Error: xpath expression '%s' does not evaluate to a nodeset in the following event:\n", xprespdelayquery);
		    xmlElemDump(stderr, sortdoc, respnode);
		    fprintf(stderr, "\n");
		    goto FAIL;
		}
		if (xpobj->nodesetval->nodeNr != 1) {
		    fprintf(stderr, "Error: xpath expression '%s' does not match exactly one node in the following event:\n", xprespdelayquery);
		    xmlElemDump(stderr, sortdoc, respnode);
		    fprintf(stderr, "\n");
		    goto FAIL;
		}
		double factor = 1;
		xmlChar * units = NULL;
		if ((units = xmlGetProp(xpobj->nodesetval->nodeTab[0], (xmlChar *)"units")) != NULL) {
		    if (xmlStrcmp(units, (xmlChar *)"msecs") == 0 || xmlStrcmp(units, (xmlChar *)"ms")) {
			factor = .001;
		    } else if (xmlStrcmp(units, (xmlChar *)"secs") == 0 || xmlStrcmp(units, (xmlChar *)"s")) {
			factor = 1;
		    } else {
			fprintf(stderr, "Error: the following value has unsupported units '%s':\n", units);
			xmlElemDump(stderr, sortdoc, xpobj->nodesetval->nodeTab[0]);
			fprintf(stderr, "\n");
		    }
		}
		double addrespdelay = xmlXPathCastToNumber(xpobj);
		resptime += addrespdelay * factor;
	    }
	    std::multimap<double, xmlNodePtr>::reverse_iterator stimriter;
	    for (stimriter = stimlist.rbegin();
		 stimriter != stimlist.rend() && (*stimriter).first > resptime;
		 stimriter++) {
#if VERBOSE
		fprintf(stderr, "Skipping stimulus %g (> %g)\n", (*stimriter).first, resptime);
#endif
	    }
	    if (stimriter != stimlist.rend()) {
		double stimtime = (*stimriter).first;
		xmlNodePtr stimnode = (*stimriter).second;
		xmlNodePtr child;
		xmlNodePtr fromnode = NULL;
		xmlNodePtr tonode = NULL;
		if (opt_maxresptime != -1 &&
		    resptime - stimtime > opt_maxresptime)
		    continue;
#if VERBOSE
		fprintf(stderr, "Matched stimulus at %g to response at %g\n",
			stimtime, resptime);
#endif
		fromnode = respnode;
		tonode = stimnode;
		if (opt_reversemerge) {
		    fromnode = stimnode;
		    tonode = respnode;
		}
		child = fromnode->children;
		while (child != NULL) {
		    xmlNodePtr next = child->next;
		    if (child->type == XML_ELEMENT_NODE &&
			strcmp((char *)child->name, "value") == 0) {
			int movethis = 1;
			if (opt_movevalues[0] != NULL) {
			    xmlAttrPtr attr = NULL;
			    movethis = 0;
			    for (attr = child->properties;
				 attr != NULL && !movethis;
				 attr = attr->next) {
				if (strcmp((char *)attr->name, "name") == 0 &&
				    attr->children &&
				    attr->children->type == XML_TEXT_NODE) {
				    char ** curvaluenameptr = NULL;
				    for (curvaluenameptr = &opt_movevalues[0];
					 *curvaluenameptr != NULL && !movethis;
					 curvaluenameptr++) {
					if (strcmp((char *)attr->children->content, *curvaluenameptr) == 0) {
					    movethis = 1;
					}
				    }
				}
			    }
			}
			/* copy node if needed */
			if ((fromnode != tonode) &&
			    (movethis ||
			     movedefaultchar == 'm' ||
			     movedefaultchar == 'c')) {
			    xmlNodePtr copy = xmlCopyNode(child, 1);
			    if (xmlAddChild(tonode, copy) == NULL) {
				fprintf(stderr, "Error copying child from resp to stim.\n");
				goto FAIL;
			    }
			    /* copy whitespace if it exists */
			    if (next &&
				next->type == XML_TEXT_NODE &&
				strspn((char *)next->content, " \f\t\r\n\v") == strlen((char *)next->content)) {
				copy = xmlCopyNode(next, 1);
				if (xmlAddChild(tonode, copy) == NULL) {
				    fprintf(stderr, "Error copying child from resp to stim.\n");
				    goto FAIL;
				}
			    }
			}
			/* erase node if needed */
			if (fromnode != tonode &&
			    (movethis ||
			     movedefaultchar == 'm' ||
			     movedefaultchar == 'e')) {
			    xmlUnlinkNode(child);
			    xmlFreeNode(child);
			}
		    }
		    child = next;
		}
		if (opt_respdelayname) {
		    xmlAttrPtr nameprop = NULL;
		    xmlNodePtr valnode = NULL;
		    static char numbuf[128];
		    sprintf(&numbuf[0], "%g", resptime - stimtime);
		    if ((valnode = xmlNewDocNode(tonode->doc, tonode->ns, (xmlChar *)"value", (xmlChar *)strdup(&numbuf[0]))) == NULL) {
			fprintf(stderr, "Error creating new resp. delay node.\n");
			goto FAIL;
		    }
		    if ((nameprop = xmlSetNsProp(valnode, tonode->ns, (xmlChar *)"name", (xmlChar *)opt_respdelayname)) == NULL) {
			fprintf(stderr, "Error adding resp. delay name attribute.\n");
			xmlFreeNode(valnode);
			goto FAIL;
		    }
		    if (xmlAddChild(tonode, valnode) == NULL) {
			fprintf(stderr, "Error adding resp. delay node.\n");
			xmlFreeNode(valnode);
			goto FAIL;
		    }
		}
	    }
	}
    }

    if (xmlSaveFile(outputfile, sortdoc) == -1) {
	fprintf(stderr, "Error saving output file!\n");
	goto FAIL;
    }

    goto EXIT;

  FAIL:
    retval = -1;

  EXIT:

    if (logfp)
	fclose(logfp);

    if (xpstimquery)
	free(xpstimquery);
    if (xprespquery)
	free(xprespquery);
    if (xpstimfilterquery)
	free(xpstimfilterquery);
    if (xprespfilterquery)
	free(xprespfilterquery);

    if (sortdoc)
	xmlFreeDoc(sortdoc);

    return retval;
}

/*
 * $Log: In-line log eliminated on transition to SVN; use svn log instead. $
 * Revision 1.12  2006/08/17 15:47:35  gadde
 * Add --reversemerge option.
 *
 * Revision 1.11  2005/09/20 18:37:55  gadde
 * Updates to versioning, help and documentation, and dependency checking
 *
 * Revision 1.10  2005/09/19 16:31:56  gadde
 * Documentation and help message updates.
 *
 * Revision 1.9  2005/09/14 15:11:21  gadde
 * Some -Wall fixes.
 *
 * Revision 1.8  2005/08/29 20:37:55  gadde
 * Add ability to restrict the values that are moved from event to event.
 *
 * Revision 1.7  2005/07/25 19:33:47  gadde
 * Don't send NULL filter queries to filter_events.
 *
 * Revision 1.6  2005/07/25 19:27:42  gadde
 * Remove verbosity.
 *
 * Revision 1.5  2005/04/01 22:27:57  gadde
 * Updates to allow more than one event per timepoint.
 * Also, get rid of non-existing GSL dependency.
 *
 * Revision 1.4  2005/03/28 20:48:36  gadde
 * Win32 updates
 *
 * Revision 1.3  2005/03/03 19:10:32  gadde
 * Replace query language name 'new' with 'event'.
 *
 * Revision 1.2  2005/03/02 17:47:58  gadde
 * Print out converted query.
 *
 * Revision 1.1  2005/02/18 19:49:37  gadde
 * Add bxh_eventresp.
 *
 */
