static const char rcsid[] = "$Id: bxh_eventlib.cpp,v 1.23 2009-01-15 20:55:18 gadde Exp $";

/*
 * bxh_eventlib.cpp --
 * 
 *  Generically useful event parsing functions.
 *
 * Author: Syam Gadde (gadde@biac.duke.edu), Feb. 2005.
 */

#include "bxh_config.h"

#include "bxh_eventlib.h"

#include <math.h>
#include <ctype.h>
#include <string.h>

#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>

#undef FUNC
#define FUNC "replace_onsetdur"
static
int
replace_onsetdur(xmlNodePtr nodep, double onset, double dur)
{
    static char numbuf[128];
    int retval = 0;
    xmlNodePtr onsetnode = NULL;
    xmlNodePtr onsetprev = NULL;
    xmlNodePtr durnode = NULL;
    xmlNodePtr durprev = NULL;
    xmlNodePtr child = NULL;
    for (child = nodep->children; child; child = child->next) {
	if (child->type == XML_ELEMENT_NODE &&
	    strcmp((char *)child->name, "onset") == 0) {
	    onsetnode = child;
	    onsetprev = child->prev;
	} else if (child->type == XML_ELEMENT_NODE &&
		   strcmp((char *)child->name, "duration") == 0) {
	    durnode = child;
	    durprev = child->prev;
	}
	if (durnode && onsetnode) {
	    break;
	}
    }
    if (onsetnode) {
	xmlUnlinkNode(onsetnode);
	xmlFreeNode(onsetnode);
    }
    sprintf(&numbuf[0], "%.15g", onset);
    if ((onsetnode = xmlNewDocNode(nodep->doc, nodep->ns, (xmlChar *)"onset", (xmlChar *)&numbuf[0])) == NULL) {
	fprintf(stderr, "Error creating new node.\n");
	goto FAIL;
    }
    if (onsetprev) {
	if (xmlAddNextSibling(onsetprev, onsetnode) == NULL) {
	    fprintf(stderr, "Error adding sibling.\n");
	    goto FAIL;
	}
    } else {
	if (xmlAddChild(nodep, onsetnode) == NULL) {
	    fprintf(stderr, "Error adding child.\n");
	    goto FAIL;
	}
    }
    if (durnode) {
	xmlUnlinkNode(durnode);
	xmlFreeNode(durnode);
    }
    sprintf(&numbuf[0], "%.15g", dur);
    if ((durnode = xmlNewDocNode(nodep->doc, nodep->ns, (xmlChar *)"duration", (xmlChar *)&numbuf[0])) == NULL) {
	fprintf(stderr, "Error creating new node.\n");
	goto FAIL;
    }
    if (durprev) {
	if (xmlAddNextSibling(durprev, durnode) == NULL) {
	    fprintf(stderr, "Error adding sibling.\n");
	    goto FAIL;
	}
    } else {
	if (xmlAddChild(nodep, durnode) == NULL) {
	    fprintf(stderr, "Error adding child.\n");
	    goto FAIL;
	}
    }
    goto EXIT;

  FAIL:
    retval = -1;

  EXIT:
    return retval;
}

#undef FUNC
#define FUNC "get_onsetdur"
static
int
get_onsetdur(xmlNodePtr nodep, double * onsetp, double * durp)
{
    int retval = 0;
    xmlNodePtr durnode = NULL;
    xmlNodePtr onsetnode = NULL;
    xmlNodePtr child = NULL;
    char * content = NULL;
    char * endptr = NULL;
    double onset;
    double dur;

    for (child = nodep->children; child; child = child->next) {
	if (child->type == XML_ELEMENT_NODE &&
	    strcmp((char *)child->name, "onset") == 0) {
	    onsetnode = child;
	} else if (child->type == XML_ELEMENT_NODE &&
		   strcmp((char *)child->name, "duration") == 0) {
	    durnode = child;
	}
	if (durnode && onsetnode) {
	    break;
	}
    }

    content = (char *)xmlNodeGetContent(onsetnode);
    onset = strtod((char *)content, &endptr);
    if (*endptr != '\0') {
	fprintf(stderr,"Error: bad \"onset\" value %s.\n", (char *)content);
	goto FAIL;
    }
    free(content); content = NULL;

    if (durnode == NULL) {
	dur = 0;
    } else {
	content = (char *)xmlNodeGetContent(durnode);
	dur = strtod((char *)content, &endptr);
	if (*endptr != '\0') {
	    fprintf(stderr,"Error: bad \"duration\" value %s.\n", (char *)content);
	    goto FAIL;
	}
	free(content); content = NULL;
    }

    *onsetp = onset;
    *durp = dur;

    goto EXIT;

  FAIL:
    retval = -1;

  EXIT:
    return retval;
}

#undef FUNC
#define FUNC "merge_event_lists"
xmlDocPtr
merge_event_lists(std::vector<xmlDocPtr> & docs, const char * alleventsxpath, FILE * logfp)
{
    xmlDocPtr retval = NULL;
    xmlXPathContextPtr xpctxt = NULL;
    xmlXPathObjectPtr xpobj = NULL;
    int numevents;
    int eventnum;
    size_t inddoc = 0;
    xmlNodePtr eventsnode = NULL;
    FILE * logerr = NULL;

    if (logfp != NULL && logfp != stderr)
	logerr = stderr;

    if (logerr)
	fprintf(logerr, "Merging event lists...\n");
    if (logfp)
	fprintf(logfp,  "Merging event lists...\n");

    if ((retval = xmlNewDoc((xmlChar *)"1.0")) == NULL) {
	fprintf(stderr,"Error: unable to create new XML document\n");
	goto FAIL;
    }
    
    if ((eventsnode = xmlNewChild((xmlNodePtr)retval, NULL, (xmlChar *)"events", NULL)) == NULL) {
	fprintf(stderr,"Error: unable to create XML element 'events'\n");
	goto FAIL;
    }
    
    /* grab all events and add copies to new doc */
    for (inddoc = 0; inddoc < docs.size(); inddoc++) {
	xmlDocPtr doc = docs[inddoc];
	if ((xpctxt = xmlXPathNewContext(doc)) == NULL) {
	    fprintf(stderr,"Error: unable to create new XPath context\n");
	    goto FAIL;
	}
	if ((xpobj = xmlXPathEvalExpression((xmlChar *)alleventsxpath, xpctxt)) == NULL) {
	    fprintf(stderr,"Error: unable to evaluate xpath expression '%s'.\n", alleventsxpath);
	    goto FAIL;
	}

	numevents = 0;
	if (xpobj->nodesetval) {
	    numevents = xpobj->nodesetval->nodeNr;
	}

	/* insert copies of nodes into new doc */
	for (eventnum = 0; eventnum < numevents; eventnum++) {
	    xmlNodePtr evcopy = NULL;
	    if ((evcopy = xmlDocCopyNode(xpobj->nodesetval->nodeTab[eventnum], retval, 1)) == NULL) {
		fprintf(stderr, "Error copying node.\n");
		goto FAIL;
	    }
	    if (xmlAddChild(eventsnode, evcopy) == NULL) {
		fprintf(stderr, "Error adding child.\n");
		goto FAIL;
	    }
#if VERBOSE
	    double onset;
	    double duration;
	    if (get_onsetdur(evcopy, &onset, &duration) != 0) {
		fprintf(stderr, "Error getting onset and duration!\n");
		goto FAIL;
	    }
	    fprintf(stderr, "[%g, %g)\n", onset, onset+duration);
#endif
	}
	xmlXPathFreeObject(xpobj); xpobj = NULL;
	xmlXPathFreeContext(xpctxt); xpctxt = NULL;
    }
    /* now copies of all matching nodes are in the new document */

    goto EXIT;
    
  FAIL:
    if (retval)
	xmlFreeDoc(retval);
    retval = NULL;

  EXIT:
    if (xpobj)
	xmlXPathFreeObject(xpobj);
    if (xpctxt)
	xmlXPathFreeContext(xpctxt);
	
    return retval;
}

#undef FUNC
#define FUNC "sort_event_list"
int
sort_event_list(xmlDocPtr doc, const char * alleventsxpath, FILE * logfp)
{
    int retval = 0;
    xmlXPathContextPtr xpctxt = NULL;
    xmlXPathObjectPtr xpobj = NULL;
    int numevents;
    int eventnum;
    FILE * logerr = NULL;

    std::multimap<se_key_t, se_data_t, se_less> sortedevents;
    std::multimap<se_key_t, se_data_t, se_less>::iterator seiter;

    if (logfp != NULL && logfp != stderr)
	logerr = stderr;

    if (logerr)
	fprintf(logerr, "Sorting event list...\n");
    if (logfp)
	fprintf(logfp,  "Sorting event list...\n");

    /* create sorted list by inserting matching nodes into map*/
    if ((xpctxt = xmlXPathNewContext(doc)) == NULL) {
	fprintf(stderr,"Error: unable to create new XPath context\n");
	goto FAIL;
    }
    if ((xpobj = xmlXPathEvalExpression((xmlChar *)alleventsxpath, xpctxt)) == NULL) {
	fprintf(stderr,"Error: unable to evaluate xpath expression '%s'.\n", alleventsxpath);
	goto FAIL;
    }

    numevents = 0;
    if (xpobj->nodesetval) {
	numevents = xpobj->nodesetval->nodeNr;
    }

    for (eventnum = 0; eventnum < numevents; eventnum++) {
	double onset;
	double duration;
	if (get_onsetdur(xpobj->nodesetval->nodeTab[eventnum], &onset, &duration) != 0) {
	    fprintf(stderr, "Error getting onset and duration!\n");
	    goto FAIL;
	}
	sortedevents.insert(se_val_t(se_key_t(onset, duration), xpobj->nodesetval->nodeTab[eventnum]));
    }
    /* now all matching nodes are referenced in sortedevents */

    /* re-insert nodes into document in sorted order */
    /* do this by re-inserting each node as child of parent of first node */
    if (sortedevents.size() != 0) {
	xmlNodePtr parent = (*sortedevents.begin()).second->parent;
	for (seiter = sortedevents.begin();
	     seiter != sortedevents.end();
	     seiter++) {
	    xmlNodePtr textnode = NULL;
	    xmlUnlinkNode((*seiter).second);
	    if ((textnode = xmlNewDocText(doc, (xmlChar *)"\n")) == NULL) {
		fprintf(stderr, "Error creating new text node.\n");
		goto FAIL;
	    }
	    if (xmlAddChild(parent, textnode) == NULL) {
		fprintf(stderr, "Error adding text child.\n");
		goto FAIL;
	    }
	    if (xmlAddChild(parent, (*seiter).second) == NULL) {
		fprintf(stderr, "Error adding child element.\n");
		goto FAIL;
	    }
#if VERBOSE
	    double onset;
	    double duration;
	    if (get_onsetdur((*seiter).second, &onset, &duration) != 0) {
		fprintf(stderr, "Error getting onset and duration!\n");
		goto FAIL;
	    }
	    fprintf(stderr, "[%g, %g)\n", onset, onset+duration);
#endif
	}
    }

    goto EXIT;
    
  FAIL:
    retval = -1;

  EXIT:
    if (xpobj)
	xmlXPathFreeObject(xpobj);
    if (xpctxt)
	xmlXPathFreeContext(xpctxt);
	
    return retval;
}

#undef FUNC
#define FUNC "canonicalize_event_list"
int
canonicalize_event_list(xmlDocPtr doc, const char * alleventsxpath, FILE * logfp)
{
    int retval = 0;
    xmlXPathContextPtr xpctxt = NULL;
    xmlXPathObjectPtr xpobj = NULL;
    int numevents;
    int eventnum;
    xmlNodePtr newnode = NULL;
    double epsilon = 0.0000001; /* nanosecond granularity */
    FILE * logerr = NULL;

    std::multimap<se_key_t, se_data_t, se_less> sortedevents;
    std::multimap<se_key_t, se_data_t, se_less>::iterator seiter;

    if (logfp != NULL && logfp != stderr)
	logerr = stderr;

    if (logerr)
	fprintf(logerr, "Canonicalizing event list...\n");
    if (logfp)
	fprintf(logfp,  "Canonicalizing event list...\n");

    if ((xpctxt = xmlXPathNewContext(doc)) == NULL) {
	fprintf(stderr,"Error: unable to create new XPath context\n");
	goto FAIL;
    }
    if ((xpobj = xmlXPathEvalExpression((xmlChar *)alleventsxpath, xpctxt)) == NULL) {
	fprintf(stderr,"Error: unable to evaluate xpath expression '%s'.\n", alleventsxpath);
	goto FAIL;
    }

    numevents = 0;
    if (xpobj->nodesetval) {
	numevents = xpobj->nodesetval->nodeNr;
    }

    for (eventnum = 0; eventnum < numevents; eventnum++) {
	double onset;
	double duration;
	if (get_onsetdur(xpobj->nodesetval->nodeTab[eventnum], &onset, &duration) != 0) {
	    fprintf(stderr, "Error getting onset and duration!\n");
	    goto FAIL;
	}
	if (duration < 0) {
	    if (logerr)
		fprintf(logerr, "Warning: found negative duration %g.  Setting it to zero.\n", duration);
	    if (logfp)
		fprintf(logfp, "Warning: found negative duration %g.  Making believe it is zero.\n", duration);
	    duration = 0;
	}
	sortedevents.insert(se_val_t(se_key_t(onset, duration), xpobj->nodesetval->nodeTab[eventnum]));
    }

    xmlXPathFreeContext(xpctxt); xpctxt = NULL;
    xmlXPathFreeObject(xpobj); xpobj = NULL;
    /* now all matching nodes are referenced in sortedevents */

    if (logerr)
	fprintf(logerr, " old number of intervals: %d\n", (int)sortedevents.size());
    if (logfp)
	fprintf(logfp,  " old number of intervals: %d\n", (int)sortedevents.size());

    /* now go through sorted nodes, and collapse/expand them to
     * create new "events" with unique non-overlapping time intervals.
     * multimap ensures that new nodes in sortedevents are always
     * to be sorted by onset and duration.
     */
    for (seiter = sortedevents.begin();
	 seiter != sortedevents.end();
	 /* null -- see increment below */) {
	se_val_t curval = *seiter;
	double onsetA = curval.first.first;
	double durA = curval.first.second;
	xmlNodePtr nodeA = curval.second;
	seiter++; /* increment here! */
	if (seiter == sortedevents.end()) {
	    break; /* we're done */
	}
	se_val_t nextval = *seiter;
	double onsetB = nextval.first.first;
	double durB = nextval.first.second;
	xmlNodePtr nodeB = nextval.second;
#if VERBOSE
	fprintf(stderr, "A [%f, %f)\n", onsetA, onsetA + durA);
	fprintf(stderr, "B [%f, %f)\n", onsetB, onsetB + durB);
#endif
	if (onsetA != onsetB && onsetA + durA <= onsetB) {
#if VERBOSE
	    fprintf(stderr, "\\case 1: no overlap.\n");
#endif
	    continue; /* no interval overlap */
	}
	if (fabs(onsetA - onsetB) < epsilon &&
	    fabs(durA - durB) < epsilon) {
	    /* intervals A and B are equal */
	    /* need to merge B into A and delete B */
#if VERBOSE
	    fprintf(stderr, "\\case 2: merge B into A, delete B.\n");
#endif
	    std::multimap<se_key_t, se_data_t, se_less>::iterator seiter2;
	    xmlNodePtr child;
	    for (child = nodeB->children; child; child = child->next) {
		if (child->type == XML_ELEMENT_NODE &&
		    strcmp((char *)child->name, "value") == 0) {
		    xmlNodePtr prev = child->prev;
		    xmlUnlinkNode(child);
		    if (xmlAddChild(nodeA, child) == NULL) {
			fprintf(stderr, "Error moving child from B to A.\n");
			goto FAIL;
		    }
		    if (prev &&
			prev->next &&
			prev->next->type == XML_TEXT_NODE &&
			strspn((char *)prev->next->content, " \f\t\r\n\v") == strlen((char *)prev->next->content)) {
			xmlNodePtr textnode = prev->next;
			xmlUnlinkNode(textnode);
			if (xmlAddChild(nodeA, textnode) == NULL) {
			    fprintf(stderr, "Error moving child from B to A.\n");
			    goto FAIL;
			}
		    }
		    if (prev)
			child = prev;
		    else
			break;
		}
	    }
	    /* reset seiter to A, but save B position for deleting */
	    seiter2 = seiter;
	    seiter--;
	    /* delete node B */
	    sortedevents.erase(seiter2);
	    xmlUnlinkNode(nodeB);
	    xmlFreeNode(nodeB);
	    continue;
	}
	/* if we get here, A and B overlap, and starting point of A
	 * is no greater than starting point of B, because they are
	 * sorted */
	if (fabs(durA) < epsilon) {
	    /* Interval A [x,x], is a single point and starts at the
	     * same time as interval B [x,y], by virtue of ordering
	     * constraints.  Merge B into A, but keep B.
	     */
#if VERBOSE
	    fprintf(stderr, "\\case 3: merge B into A, keep B.\n");
#endif
	    xmlNodePtr child;
	    for (child = nodeB->children; child; child = child->next) {
		if (child->type == XML_ELEMENT_NODE &&
		    strcmp((char *)child->name, "value") == 0) {
		    xmlNodePtr childcopy = xmlCopyNode(child, 1);
		    xmlUnlinkNode(childcopy);
		    if (xmlAddChild(nodeA, childcopy) == NULL) {
			fprintf(stderr, "Error copying child from B to A.\n");
			goto FAIL;
		    }
		    if (child->next &&
			child->next->type == XML_TEXT_NODE &&
			strspn((char *)child->next->content, " \f\t\r\n\v") == strlen((char *)child->next->content)) {
			childcopy = xmlCopyNode(child->next, 1);
			xmlUnlinkNode(childcopy);
			if (xmlAddChild(nodeA, childcopy) == NULL) {
			    fprintf(stderr, "Error moving child from B to A.\n");
			    goto FAIL;
			}
		    }
		};
	    }
	    /* iterator is now at B, which is where we want it */
	    continue;
	}
	if (onsetA < onsetB) {
	    /* Interval A [w,x) overlaps and starts before
	     * interval B [y,z):
	     *     |-----A-----|
	     *  <--w-----y-----x-----z-->
	     *           |-----B-----|
	     * or
	     *     |--------A--------|
	     *  <--w-----y-----z-----x-->
	     *           |--B--|
	     * Split interval A into two fragments C [a1,b1) and D [b1,a2):
	     *     |--C--|--D--|
	     *  <--w-----y-----x-----z-->
	     *           |-----B-----|
	     * or
	     *     |--C--|-----D-----|
	     *  <--w-----y-----z-----x-->
	     *           |--B--|
	     */
#if VERBOSE
	    fprintf(stderr, "\\case 4: split A into two fragments:\n");
	    fprintf(stderr, " | C [%f, %f)\n", onsetA, onsetB);
	    fprintf(stderr, " | D [%f, %f)\n", onsetB, onsetA + durA);
#endif
	    if ((newnode = xmlCopyNode(nodeA, 1)) == NULL) {
		fprintf(stderr, "Error copying node.\n");
		goto FAIL;
	    }
	    if (replace_onsetdur(nodeA, onsetA, onsetB - onsetA) != 0) {
		fprintf(stderr, "Error replacing onset/dur in interval (C).\n");
		xmlFreeNode(newnode);
		goto FAIL;
	    }
	    if (replace_onsetdur(newnode, onsetB, durA - (onsetB - onsetA)) != 0) {
		fprintf(stderr, "Error replacing onset/dur in interval (D).\n");
		xmlFreeNode(newnode);
		goto FAIL;
	    }

	    /* erase obsolete interval A (need to reset seiter below) */
	    seiter--; /* was at B, set to A */
	    sortedevents.erase(seiter);
	    /* add new intervals C and D to event list, resetting
	     * loop iterator just past interval C
	     * (C is guaranteed to not overlap any other intervals,
	     *  so no further processing is needed) */
	    seiter = sortedevents.insert(se_val_t(se_key_t(onsetA, onsetB - onsetA), nodeA)); /* insert C and set iterator */
	    sortedevents.insert(se_val_t(se_key_t(onsetB, durA - (onsetB - onsetA)), newnode)); /* insert D */
	    seiter++; /* increment past C -- just an optimization */
	    /* Note that we couldn't just set seiter to interval D,
	     * because there may be other unprocessed intervals that
	     * have the same span/key as D, and the insert() may have
	     * placed D past them. */
	    continue;
	}
	if (fabs(onsetA - onsetB) < epsilon && durA < durB) {
	    /* Interval A [x,y) starts at the same time, but
	     * ends before interval B [x,z):
	     *     |--A--|
	     *  <--x-----y-----z-->
	     *     |-----B-----|
	     * Split interval B into two fragments C [x,y) and D [y,z).
	     *     |--A--|
	     *  <--x-----y-----z-->
	     *     |--C--|--D--|
	     */
#if VERBOSE
	    fprintf(stderr, "\\case 5 (durA=%g): split B into two fragments:\n", durA);
	    fprintf(stderr, " | C [%f, %f)\n", onsetB, onsetB + durA);
	    fprintf(stderr, " | D [%f, %f)\n", onsetB + durA, onsetB + durB);
#endif
	    if ((newnode = xmlCopyNode(nodeB, 1)) == NULL) {
		fprintf(stderr, "Error copying node.\n");
		goto FAIL;
	    }
	    if (replace_onsetdur(nodeB, onsetB, durA) != 0) {
		fprintf(stderr, "Error replacing onset/dur in interval (C).\n");
		xmlFreeNode(newnode);
		goto FAIL;
	    }
	    if (replace_onsetdur(newnode, onsetB + durA, durB - durA) != 0) {
		fprintf(stderr, "Error replacing onset/dur in interval (D).\n");
		xmlFreeNode(newnode);
		goto FAIL;
	    }
	    
	    /* erase obsolete interval B (need to reset seiter below) */
	    sortedevents.erase(seiter); /* seiter is already at B */
	    /* add new intervals C and D to event list, resetting
	     * loop iterator to A or C (whichever is earlier in multimap)
	     */
	    sortedevents.insert(se_val_t(se_key_t(onsetB, durA), nodeB)); /* insert C */
	    sortedevents.insert(se_val_t(se_key_t(onsetB + durA, durB - durA), newnode)); /* insert D */
	    seiter = sortedevents.lower_bound(se_key_t(onsetB, durA));
	    while ((*seiter).second != nodeA && (*seiter).second != nodeB) {
		seiter++;
	    }
	    if (seiter == sortedevents.end()) {
		/* should have stopped at nodeB or newnode! */
		fprintf(stderr, "Internal error: couldn't find interval A or C!\n");
		goto FAIL;
	    }
	    continue;
	}
	fprintf(stderr, "Internal error: events are not sorted correctly?\n");
	goto FAIL;
    }

    if (logerr)
	fprintf(logerr, " new number of intervals: %d\n", (int)sortedevents.size());
    if (logfp)
	fprintf(logfp,  " new number of intervals: %d\n", (int)sortedevents.size());

    /* re-insert nodes into document in sorted order */
    /* do this by re-inserting each node as child of parent of first node */
    if (sortedevents.size() != 0) {
	xmlNodePtr parent = (*sortedevents.begin()).second->parent;
	for (seiter = sortedevents.begin();
	     seiter != sortedevents.end();
	     seiter++) {
	    xmlUnlinkNode((*seiter).second);
	    if (xmlAddChild(parent, (*seiter).second) == NULL) {
		fprintf(stderr, "Error adding child element.\n");
		goto FAIL;
	    }
	}
    }

    goto EXIT;
    
  FAIL:
    retval = -1;

  EXIT:
    if (xpobj)
	xmlXPathFreeObject(xpobj);
    if (xpctxt)
	xmlXPathFreeContext(xpctxt);
	
    return retval;
}

#undef FUNC
#define FUNC "match_events"
int
match_events(xmlDocPtr doc, int numqueries, char ** queries, char ** querylabels, std::map<double, qvec > & eventlist, FILE * logfp)
{
    int retval = 0;
    const char * queryprefix = "//*[local-name()='events']/*[local-name()='event']";
    char * fullquery = NULL;
    int indquery = 0;
    xmlXPathContextPtr xpctxt = NULL;
    xmlXPathObjectPtr xpobj = NULL;

    FILE * logerr = NULL;

    if (logfp != NULL && logfp != stderr)
	logfp = stderr;

    for (indquery = 0; indquery < numqueries; indquery++) {
	size_t prefixlen = 0;
	size_t predicatelen = 0;
	int nummatches = 0;
	int indmatch = 0;
	
	prefixlen = strlen(queryprefix);
	predicatelen = strlen(queries[indquery]);
	fullquery = (char *)malloc(sizeof(char)*(prefixlen + 1 + predicatelen + 1 + 1));
	strcpy(fullquery, queryprefix);
	strcpy(fullquery + prefixlen, "[");
	strcpy(fullquery + prefixlen + 1, queries[indquery]);
	strcpy(fullquery + prefixlen + 1 + predicatelen, "]");
	
	if ((xpctxt = xmlXPathNewContext(doc)) == NULL) {
	    fprintf(stderr,"Error: unable to create new XPath context\n");
	    goto FAIL;
	}

	if ((xpobj = xmlXPathEvalExpression((xmlChar *)fullquery, xpctxt)) == NULL) {
	    fprintf(stderr,"Error: unable to evaluate xpath expression '%s'.\n", fullquery);
	    goto FAIL;
	}

	nummatches = 0;
	if (xpobj->nodesetval) {
	    nummatches = xpobj->nodesetval->nodeNr;
	}

	if (logerr)
	    fprintf(logerr, "Query '%s' matched %d events.\n", querylabels[indquery], nummatches);
	if (logfp)
	    fprintf(logfp,  "Query '%s' matched %d events.\n", querylabels[indquery], nummatches);
	if (logfp)
	    fprintf(logfp,  " Onsets:");

	for (indmatch = 0; indmatch < nummatches; indmatch++) {
	    double onset;
	    double dur;
	    if (get_onsetdur(xpobj->nodesetval->nodeTab[indmatch], &onset, &dur) != 0) {
		fprintf(stderr, "Error getting onset and duration!\n");
		goto FAIL;
	    }
	    {
		size_t qvind;
		qvec & qlist = eventlist[onset];
		size_t qsize = qlist.size();
		size_t numqnodes;
		size_t qnodenum;
		xmlNodePtr matchednode = xpobj->nodesetval->nodeTab[indmatch];
		for (qvind = 0; qvind < qsize; qvind++) {
		    if (qlist[qvind].qind == indquery)
			break;
		}
		if (qvind == qsize) {
		    /* query index not there already; need to add it to list */
		    qlist.push_back(qent_t());
		    qlist[qsize].qind = indquery;
		    qvind = qsize;
		}
		numqnodes = qlist[qvind].qnodes.size();
		for (qnodenum = 0; qnodenum < numqnodes; qnodenum++) {
		    if (qlist[qvind].qnodes[qnodenum] == matchednode)
			break;
		}
		if (qnodenum == numqnodes) {
		    /* matched node not there already; need to add it to list */
		    qlist[qvind].qnodes.push_back(matchednode);
		}
	    }
	    if (logfp)
		fprintf(logfp, " %g", onset);
	}
	if (logfp)
	    fprintf(logfp, "\n");

	xmlXPathFreeObject(xpobj); xpobj = NULL;
	xmlXPathFreeContext(xpctxt); xpctxt = NULL;
	free(fullquery); fullquery = NULL;
    }

    goto EXIT;
    
  FAIL:
    retval = -1;

  EXIT:
    if (xpctxt)
	xmlXPathFreeContext(xpctxt);
    if (xpobj)
	xmlXPathFreeObject(xpobj);
    if (fullquery)
	free(fullquery);
    return retval;
}

#undef FUNC
#define FUNC "filter_events"
int
filter_events(xmlDocPtr doc, int numqueries, char ** queryfilters, char ** querylabels, std::map<double, qvec > & eventlist, FILE * logfp)
{
    int retval = 0;
    const char * queryprefix = "//*[local-name()='events']/*[local-name()='event']";
    char * fullquery = NULL;
    int indquery = 0;
    xmlXPathContextPtr xpctxt = NULL;
    xmlXPathObjectPtr xpobj = NULL;
    std::map<double, qvec> neweventlist;
    FILE * logerr = NULL;

    if (logfp != NULL && logfp != stderr)
	logfp = stderr;

    for (indquery = 0; indquery < numqueries; indquery++) {
	size_t prefixlen = 0;
	size_t predicatelen = 0;
	int nummatches = 0;
	int indmatch = 0;

	if (queryfilters[indquery] == NULL)
	    continue; /* note: this means no events moved to neweventlist! */
	
	prefixlen = strlen(queryprefix);
	predicatelen = strlen(queryfilters[indquery]);
	fullquery = (char *)malloc(sizeof(char)*(prefixlen + 1 + predicatelen + 1 + 1));
	strcpy(fullquery, queryprefix);
	strcpy(fullquery + prefixlen, "[");
	strcpy(fullquery + prefixlen + 1, queryfilters[indquery]);
	strcpy(fullquery + prefixlen + 1 + predicatelen, "]");
	
	if ((xpctxt = xmlXPathNewContext(doc)) == NULL) {
	    fprintf(stderr,"Error: unable to create new XPath context\n");
	    goto FAIL;
	}

	if ((xpobj = xmlXPathEvalExpression((xmlChar *)fullquery, xpctxt)) == NULL) {
	    fprintf(stderr,"Error: unable to evaluate xpath expression '%s'.\n", fullquery);
	    goto FAIL;
	}

	nummatches = 0;
	if (xpobj->nodesetval) {
	    nummatches = xpobj->nodesetval->nodeNr;
	}

	for (indmatch = 0; indmatch < nummatches; indmatch++) {
	    double onset;
	    double dur;
	    if (get_onsetdur(xpobj->nodesetval->nodeTab[indmatch], &onset, &dur) != 0) {
		fprintf(stderr, "Error getting onset and duration!\n");
		goto FAIL;
	    }

	    /* find any events in eventlist whose onset is contained within
	     * filter event [onset, onset+dur), i.e. such that:
	     *   evonset < onset + dur
	     *   onset < evonset
	     * or:
	     *   evonset == onset
	     */
	    {
		std::map<double, qvec>::iterator eiter;
		std::map<double, qvec>::iterator eiter2;
		size_t qvind;
		eiter = eventlist.lower_bound(onset);
		while (eiter != eventlist.end() &&
		       ((*eiter).first == onset ||
			(onset < (*eiter).first &&
			 (*eiter).first < onset + dur))) {
		    /* need to move this to new event list */
		    qvec & qlist = (*eiter).second;
		    size_t qsize = qlist.size();
		    std::vector<xmlNodePtr> movednodes;
		    int foundit = 0;
		    for (qvind = 0; qvind < qsize; qvind++) {
			if (qlist[qvind].qind == indquery) {
			    movednodes = qlist[qvind].qnodes;
			    qlist[qvind] = qlist[qlist.size()-1];
			    qlist.resize(qlist.size()-1);
			    foundit = 1;
			    break;
			}
		    }
		    if (foundit) {
			/* we found the query, need to move it */
			qvec & newqlist = neweventlist[(*eiter).first];
			size_t newqsize = newqlist.size();
			for (qvind = 0; qvind < newqsize; qvind++) {
			    if (newqlist[qvind].qind == indquery)
				break;
			}
			if (qvind == newqsize) {
			    newqlist.push_back(qent_t());
			    newqlist[newqsize].qind = indquery;
			}
			std::vector<xmlNodePtr> & newnodes =
			    newqlist[newqsize].qnodes;
			size_t nummovednodes = movednodes.size();
			size_t numnewnodes = newnodes.size();
			size_t movednodenum;
			size_t newnodenum;
			for (movednodenum = 0; movednodenum < nummovednodes; movednodenum++) {
			    for (newnodenum = 0; newnodenum < numnewnodes; newnodenum++) {
				if (movednodes[movednodenum] == newnodes[newnodenum])
				    break;
			    }
			    if (newnodenum == numnewnodes) {
				newnodes.push_back(movednodes[movednodenum]);
			    }
			}
		    }
		    eiter2 = eiter;
		    eiter++;
		    if (qsize == 0) {
			eventlist.erase(eiter2);
		    }
		}
	    }
	}

	{
	    size_t numfilteredout = 0;
	    std::map<double, qvec>::iterator eiter;
	    size_t qvind;
	    if (logfp)
		fprintf(logfp, "Event filter query for '%s' will exclude events at the following onsets:", querylabels[indquery]);
	    for (eiter = eventlist.begin();
		 eiter != eventlist.end();
		 eiter++) {
		qvec & qlist = (*eiter).second;
		size_t qsize = qlist.size();
		for (qvind = 0; qvind < qsize; qvind++) {
		    if (qlist[qvind].qind == indquery) {
			numfilteredout++;
			if (logfp)
			    fprintf(logfp, " %g", (*eiter).first);
			break;
		    }
		}
	    }
	    if (logfp)
		fprintf(logfp, "\n");
	    if (logerr) fprintf(logerr, "Event filter query for '%s' excluded %u events.\n", querylabels[indquery], (int)numfilteredout);
	}

	xmlXPathFreeObject(xpobj); xpobj = NULL;
	xmlXPathFreeContext(xpctxt); xpctxt = NULL;
	free(fullquery); fullquery = NULL;
    }

    eventlist = neweventlist;

    /* remove events that no longer correpond to any queries */
    {
	std::map<double, qvec>::iterator eiter;
	std::map<double, qvec>::iterator eiter2;
	eiter = eventlist.begin();
	while (eiter != eventlist.end()) {
	    if ((*eiter).second.size() == 0) {
		eiter2 = eiter;
		eiter++;
		eventlist.erase(eiter2);
	    } else {
		eiter++;
	    }
	}
    }

    goto EXIT;
    
  FAIL:
    retval = -1;

  EXIT:
    if (xpctxt)
	xmlXPathFreeContext(xpctxt);
    if (xpobj)
	xmlXPathFreeObject(xpobj);
    if (fullquery)
	free(fullquery);
    return retval;
}

#undef FUNC
#define FUNC "exclude_epochs"
int
exclude_epochs(xmlDocPtr doc, int numqueries, char ** queryepochexcludes, char ** querylabels, double epochdurbefore, double epochdurafter, std::map<double, qvec > & eventlist, FILE * logfp)
{
    int retval = 0;
    const char * queryprefix = "//*[local-name()='events']/*[local-name()='event']";
    char * fullquery = NULL;
    int indquery = 0;
    xmlXPathContextPtr xpctxt = NULL;
    xmlXPathObjectPtr xpobj = NULL;
    FILE * logerr = NULL;

    if (logfp != NULL && logfp != stderr)
	logerr = stderr;

    for (indquery = 0; indquery < numqueries; indquery++) {
	size_t prefixlen = 0;
	size_t predicatelen = 0;
	int nummatches = 0;
	int indmatch = 0;
	int numexclevents = 0;

	if (queryepochexcludes[indquery] == NULL)
	    continue;
	
	prefixlen = strlen(queryprefix);
	predicatelen = strlen(queryepochexcludes[indquery]);
	fullquery = (char *)malloc(sizeof(char)*(prefixlen + 1 + predicatelen + 1 + 1));
	strcpy(fullquery, queryprefix);
	strcpy(fullquery + prefixlen, "[");
	strcpy(fullquery + prefixlen + 1, queryepochexcludes[indquery]);
	strcpy(fullquery + prefixlen + 1 + predicatelen, "]");
	
	if ((xpctxt = xmlXPathNewContext(doc)) == NULL) {
	    fprintf(stderr,"Error: unable to create new XPath context\n");
	    goto FAIL;
	}

	if ((xpobj = xmlXPathEvalExpression((xmlChar *)fullquery, xpctxt)) == NULL) {
	    fprintf(stderr,"Error: unable to evaluate xpath expression '%s'.\n", fullquery);
	    goto FAIL;
	}

	nummatches = 0;
	if (xpobj->nodesetval) {
	    nummatches = xpobj->nodesetval->nodeNr;
	}

	if (logfp)
	    fprintf(logfp, "Epoch exclusion query for '%s' will exclude events at the following onsets:", querylabels[indquery]);

	for (indmatch = 0; indmatch < nummatches; indmatch++) {
	    double onset;
	    double dur;
	    if (get_onsetdur(xpobj->nodesetval->nodeTab[indmatch], &onset, &dur) != 0) {
		fprintf(stderr, "Error getting onset and duration!\n");
		goto FAIL;
	    }

	    /* find any epochs [evonset-epochdurbefore, evonset+epochdurafter)
	     * in eventlist that overlap with current exclusionary event
	     * [onset, onset+dur), i.e. such that:
	     *   evonset - epochdurbefore < onset + dur
	     *   onset < evonset + epochdurafter
	     * so:
	     *   evonset < onset + dur + epochdurbefore
	     *   evonset > onset - epochdurafter
	     */
	    {
		std::map<double, qvec>::iterator eiter;
		size_t qvind;
		eiter = eventlist.lower_bound(onset - epochdurafter);
		while (eiter != eventlist.end() &&
		       (*eiter).first == onset - epochdurafter) {
		    eiter++;
		}
		while (eiter != eventlist.end() &&
		       (*eiter).first < onset + dur + epochdurbefore) {
		    qvec & qlist = (*eiter).second;
		    size_t qsize = qlist.size();
		    for (qvind = 0; qvind < qsize; qvind++) {
			if (qlist[qvind].qind == indquery) {
			    numexclevents++;
			    if (logfp)
				fprintf(logfp, " %g", (*eiter).first);
			    qlist[qvind] = qlist[qlist.size()-1];
			    qlist.resize(qlist.size()-1);
			    break;
			}
		    }
		    eiter++;
		}
	    }
	}
	if (logfp)
	    fprintf(logfp, "\n");
	if (logerr)
	    fprintf(logerr, "Epoch exclusion query for '%s' excluded %u events.\n", querylabels[indquery], numexclevents);

	xmlXPathFreeObject(xpobj); xpobj = NULL;
	xmlXPathFreeContext(xpctxt); xpctxt = NULL;
	free(fullquery); fullquery = NULL;
    }

    /* remove events that no longer correpond to any queries */
    {
	std::map<double, qvec>::iterator eiter;
	std::map<double, qvec>::iterator eiter2;
	eiter = eventlist.begin();
	while (eiter != eventlist.end()) {
	    if ((*eiter).second.size() == 0) {
		eiter2 = eiter;
		eiter++;
		eventlist.erase(eiter2);
	    } else {
		eiter++;
	    }
	}
    }

    goto EXIT;
    
  FAIL:
    retval = -1;

  EXIT:
    if (xpctxt)
	xmlXPathFreeContext(xpctxt);
    if (xpobj)
	xmlXPathFreeObject(xpobj);
    if (fullquery)
	free(fullquery);
    return retval;
}

#undef FUNC
#define FUNC "concatstr"
static
std::string
concatstr(const std::vector<std::pair<int,std::string> > & list, off_t start, off_t num)
{
    std::string retval("");
    off_t ind;
    for (ind = 0; ind < num; ind++) {
	if (ind > 0)
	    retval += " ";
	retval += list[start+ind].second;
    }
    return retval;
}

#undef FUNC
#define FUNC "NEXTTOKEN"
static
int
NEXTTOKEN(const std::vector<std::pair<int,std::string> > & tokenlist, off_t & tokennum, int & curtype, std::string & curtoken)
{
    if (tokennum + 1 >= (off_t) tokenlist.size()) {
	fprintf(stderr, FUNC ": expecting more after:\n %s\n",
		concatstr(tokenlist, 0, tokenlist.size()).c_str());
	return 0;
    }
    const std::pair<int,std::string> & tstruct = tokenlist[++tokennum];
    curtype = tstruct.first;
    curtoken = tstruct.second;
    return 1;
}

#undef FUNC
#define FUNC "query2xpath"
char *
query2xpath(const char * queryin)
{
    std::string queryout("");
    const char * curpos = NULL;
    typedef enum {
	T_INVALID,
	T_NUMTOKEN,
	T_STRTOKEN,
	T_PARAMTOKEN,
	T_OPENPAREN,
	T_CLOSEPAREN,
	T_COMMA,
	T_DASH,
	T_AND,
	T_OR,
	T_INEQ_OP,
	T_EQ_OP,
    } tokentype;
    typedef enum {
	S_INVALID,
	S_QUERY,
	S_PQUERY,
	S_CONDITION
    } parsestate;
    typedef std::pair<int, std::string> tokenstruct;
    std::vector<tokenstruct> tokenlist;
    std::vector<unsigned int> stack;
    off_t tokennum = 0;
    size_t numtokens = 0;
    std::map<std::string, std::string> magicparams;

    if (queryin == NULL) {
	fprintf(stderr, FUNC ": queryin is null!\n");
	goto FAIL;
    }

    /* first convert into tokens */
    curpos = queryin;
    while (*curpos) {
	int tokentype = -1;
	size_t tokenlen = 0;
	const char * tokenstart = curpos;
	while (*curpos && isspace(*curpos)) {
	    tokenstart++;
	    curpos++;
	}
	if (isdigit(*curpos) || *curpos == '.') {
	    /* NUMTOKEN   ::=  DIGIT+ "." DIGIT*
	     *              |  DIGIT+
	     *              |  "." DIGIT+
	     */
	    while (isdigit(*curpos)) { tokenlen++; curpos++; }
	    if (*curpos == '.')      { tokenlen++; curpos++; }
	    while (isdigit(*curpos)) { tokenlen++; curpos++; }
	    tokentype = T_NUMTOKEN;
	} else if (*curpos == '"' || *curpos == '\'') {
	    /* STRTOKEN   ::=  "'" STRCHAR1+ "'"
	     *              |  '"' STRCHAR2+ '"'
	     * STRCHAR1   ::= any ASCII character except single quote (')
	     * STRCHAR2   ::= any ASCII character except double quote (")
	     */
	    char quotechar = *curpos;
	    tokenlen++; curpos++;
	    while (*curpos != '\0' && *curpos != quotechar) {
		tokenlen++;
		curpos++;
	    }
	    if (*curpos != quotechar) {
		fprintf(stderr, FUNC ": didn't find end of string character (%c):\n %s\n", quotechar, tokenstart);
		goto FAIL;
	    }
	    tokenlen++; /* include end-of-string character */
	    curpos++; /* get rid of end-of-string character */
	    tokentype = T_STRTOKEN;
	} else if (*curpos == '$' || *curpos == '%' || *curpos == '_' ||
		   isalpha(*curpos)) {
	    /* PARAMTOKEN ::=  "$" PARAMSTART PARAMCHAR*
	     *              |  "%" PARAMSTART PARAMCHAR*
	     *              |      PARAMSTART PARAMCHAR*
	     * PARAMSTART ::=  "_" | LETTER
	     * PARAMCHAR  ::=  "." | "_" | LETTER | DIGIT
	     */
	    if (*curpos == '$' || *curpos == '%') {
		tokenlen++;
		curpos++;
	    }
	    if (*curpos != '_' && !isalpha(*curpos)) {
		fprintf(stderr, FUNC ": parameter name must start with underscore (_) or letter at:\n %s\n", curpos);
		goto FAIL;
	    }
	    tokenlen++;
	    curpos++;
	    while (*curpos == '.' || *curpos == '_' || isalnum(*curpos)) {
		tokenlen++;
		curpos++;
	    }
	    tokentype = T_PARAMTOKEN;
	} else if (*curpos == '(') {
	    tokenlen++;
	    curpos++;
	    tokentype = T_OPENPAREN;
	} else if (*curpos == ')') {
	    tokenlen++;
	    curpos++;
	    tokentype = T_CLOSEPAREN;
	} else if (*curpos == ',') {
	    tokenlen++;
	    curpos++;
	    tokentype = T_COMMA;
	} else if (*curpos == '-') {
	    tokenlen++;
	    curpos++;
	    tokentype = T_DASH;
	} else if (*curpos == '&') {
	    tokenlen++;
	    curpos++;
	    tokentype = T_AND;
	} else if (*curpos == '|') {
	    tokenlen++;
	    curpos++;
	    tokentype = T_OR;
	} else if (*curpos == '>' || *curpos == '<') {
	    /* INEQ_OP  ::=  "<=" | ">=" | "<" | ">" */
	    tokenlen++;
	    curpos++;
	    if (*curpos == '=') {
		tokenlen++;
		curpos++;
	    }
	    tokentype = T_INEQ_OP;
	} else if (*curpos == '!' || *curpos == '=') {
	    /* EQ_OP    ::=  "==" | "!=" */
	    tokenlen++;
	    curpos++;
	    if (*curpos != '=') {
		fprintf(stderr, FUNC ": bad operator at:\n %s\n", tokenstart);
		goto FAIL;
	    }
	    tokenlen++;
	    curpos++;
	    tokentype = T_EQ_OP;
	} else {
	    fprintf(stderr, FUNC ": unrecognized syntax at:\n %s\n", tokenstart);
	    goto FAIL;
	}
	tokenlist.push_back(tokenstruct(tokentype,std::string(tokenstart, tokenlen)));
    }

    if (tokenlist.size() == 0) {
	fprintf(stderr, FUNC ": queryin is empty!\n");
	goto FAIL;
    }

    magicparams[std::string("$onset")] = std::string("onset");
    magicparams[std::string("onset")] = std::string("onset");
    magicparams[std::string("$duration")] = std::string("duration");
    magicparams[std::string("duration")] = std::string("duration");
    magicparams[std::string("$type")] = std::string("@type");
    magicparams[std::string("type")] = std::string("@type");
    magicparams[std::string("$units")] = std::string("units");
    magicparams[std::string("units")] = std::string("units");
    magicparams[std::string("$description")] = std::string("description");
    magicparams[std::string("description")] = std::string("description");

    tokennum = 0;
    numtokens = tokenlist.size();

    /* Starting point is thus:
     *   QUERY ::= "(" QUERY ")"
     *           | QUERY "&" QUERY
     *           | QUERY "|" QUERY
     *           | CONDITION
     * XPath has the same order of operations, so we don't need to
     * worry about re-ordering anything or adding parentheses.
     * For our purposes, then, the above is equivalent to:
     *   QUERY  ::= PQUERY    "&" QUERY
     *            | PQUERY    "|" QUERY
     *            | PQUERY
     *            | CONDITION "&" QUERY
     *            | CONDITION "|" QUERY
     *            | CONDITION
     *   PQUERY ::= "(" QUERY ")"
     * The state machine below uses "goto"s for clarity!
     */
    stack.push_back(S_QUERY);
    while (stack.size() > 0 && tokennum < (off_t)numtokens) {
	tokenstruct & tstruct = tokenlist[tokennum];
	int curtype = tstruct.first;
	std::string curtoken = tstruct.second;
	if (curtype == T_OPENPAREN) {
	    stack.push_back(S_PQUERY);
	    queryout += "(";
	    tokennum++;
	    continue;
	}
	/* we didn't find an open parenthesis, so parse a CONDITION */
	{
	    std::string lvalue("");
	    int ltype = -1;
	    std::string rvalue("");
	    if (curtype != T_PARAMTOKEN && curtype != T_NUMTOKEN && curtype != T_STRTOKEN) {
		fprintf(stderr, "param name, string, or number expected after:\n %s\nbut got:\n %s\n",
			concatstr(tokenlist, 0, tokennum).c_str(),
			concatstr(tokenlist, tokennum, numtokens-tokennum).c_str());
		goto FAIL;
	    }
	    ltype = curtype;
	    if (curtype == T_PARAMTOKEN) {
		if (magicparams.find(curtoken) != magicparams.end()) {
		    lvalue = magicparams[curtoken];
		} else {
		    if (curtoken[0] == '%') {
			curtoken.erase(0,1);
		    }
		    lvalue += "value[@name='";
		    lvalue += curtoken;
		    lvalue += "']";
		}
	    } else {
		lvalue = curtoken;
	    }
	    if (tokennum + 1 < (off_t)numtokens) {
		NEXTTOKEN(tokenlist, tokennum, curtype, curtoken);
	    } else {
		curtype = T_INVALID;
		tokennum++;
	    }
	    if (curtype == T_OPENPAREN) {
		int firstclause = 1;
		if (ltype != T_PARAMTOKEN) {
		    fprintf(stderr, "Expected param name before paren here:\n %s\n",
			    concatstr(tokenlist, 0, tokennum+1).c_str());
		    goto FAIL;
		}
		queryout += "(";
		if (!NEXTTOKEN(tokenlist, tokennum, curtype, curtoken))
		    goto FAIL;
		while (curtype != T_CLOSEPAREN) {
		    if (!firstclause) {
			if (curtype != T_COMMA) {
			    fprintf(stderr, "Expected comma or right-paren after:\n %s\nbut got:\n %s\n",
				    concatstr(tokenlist, 0, tokennum).c_str(),
				    concatstr(tokenlist, tokennum, numtokens-tokennum).c_str());
			    goto FAIL;
			}
			queryout += " or ";
			if (!NEXTTOKEN(tokenlist, tokennum, curtype, curtoken))
			    goto FAIL;
		    }
		    firstclause = 0;
		    if (curtype == T_INEQ_OP) {
			std::string op = curtoken;
			if (!NEXTTOKEN(tokenlist, tokennum, curtype, curtoken))
			    goto FAIL;
			if (curtype != T_NUMTOKEN) {
			    fprintf(stderr, "Expected number after:\n %s\nbut got:\n %s\n",
				    concatstr(tokenlist, 0, tokennum).c_str(),
				    concatstr(tokenlist, tokennum, numtokens-tokennum).c_str());
			    goto FAIL;
			}
			queryout += lvalue;
			queryout += op;
			queryout += curtoken;
		    } else if (curtype == T_NUMTOKEN) {
			std::string rangebegin = curtoken;
			if (tokennum + 1 < (off_t)numtokens &&
			    tokenlist[tokennum+1].first == T_DASH) {
			    tokennum++;
			    if (!NEXTTOKEN(tokenlist, tokennum, curtype, curtoken))
				goto FAIL;
			    if (curtype != T_NUMTOKEN) {
				fprintf(stderr, FUNC ": number expected after:\n %s\nbut got:\n %s\n",
					concatstr(tokenlist, 0, tokennum).c_str(),
					concatstr(tokenlist, tokennum, numtokens-tokennum).c_str());
			    }
			    std::string rangeend = curtoken;
			    queryout += "(";
			    queryout += lvalue;
			    queryout += ">=";
			    queryout += rangebegin;
			    queryout += " and ";
			    queryout += lvalue;
			    queryout += "<=";
			    queryout += rangeend;
			    queryout += ")";
			} else {
			    queryout += lvalue;
			    queryout += "=";
			    queryout += curtoken;
			}
		    } else {
			/* should be a string */
			queryout += lvalue;
			queryout += "=";
			queryout += curtoken;
		    }
		    if (!NEXTTOKEN(tokenlist, tokennum, curtype, curtoken))
			goto FAIL;
		}
		tokennum++;
		queryout += ")";
	    } else if (curtype == T_INEQ_OP) {
		std::string op = curtoken;
		if (!NEXTTOKEN(tokenlist, tokennum, curtype, curtoken))
		    goto FAIL;
		if (curtype == T_PARAMTOKEN) {
		    if (magicparams.find(curtoken) != magicparams.end()) {
			rvalue = magicparams[curtoken];
		    } else {
			if (curtoken[0] == '%') {
			    curtoken.erase(0,1);
			}
			rvalue += "value[@name='";
			rvalue += curtoken;
			rvalue += "']";
		    }
		} else if (curtype == T_NUMTOKEN) {
		    rvalue = curtoken;
		} else {
		    fprintf(stderr, "Expected param name or number after:\n %s\nbut got:\n %s\n",
			    concatstr(tokenlist, 0, tokennum).c_str(),
			    concatstr(tokenlist, tokennum, numtokens-tokennum).c_str());
		    goto FAIL;
		}
		queryout += lvalue;
		queryout += op;
		queryout += rvalue;
		tokennum++;
	    } else if (curtype == T_EQ_OP) {
		std::string op = curtoken;
		if (op.compare("==") == 0) {
		    op = "=";
		}
		if (!NEXTTOKEN(tokenlist, tokennum, curtype, curtoken))
		    goto FAIL;
		if (curtype == T_PARAMTOKEN) {
		    if (magicparams.find(curtoken) != magicparams.end()) {
			rvalue = magicparams[curtoken];
		    } else {
			if (curtoken[0] == '%') {
			    curtoken.erase(0,1);
			}
			rvalue += "value[@name='";
			rvalue += curtoken;
			rvalue += "']";
		    }
		} else if (curtype == T_NUMTOKEN || curtype == T_STRTOKEN) {
		    rvalue = curtoken;
		} else {
		    fprintf(stderr, "Expected param name, number, or string after:\n %s\nbut got:\n %s\n",
			    concatstr(tokenlist, 0, tokennum).c_str(),
			    concatstr(tokenlist, tokennum, numtokens-tokennum).c_str());
		    goto FAIL;
		}
		queryout += lvalue;
		queryout += op;
		queryout += rvalue;
		tokennum++;
	    } else {
		/* simple test */
		queryout += lvalue;
		queryout += "!=";
		queryout += "0";
	    }
	}

	int checkstateend = 1;
	while (checkstateend) {
	    /*
	     * pre-conditions:
	     *  for state S_PQUERY:
	     *    Next token is '&' or '|', which continues the query,
	     *    or next token is ')', which ends this parenthesized query (pop!).
	     *  for state S_QUERY:
	     *    Next token is '&' or '|', which continues the query,
	     *    or there is no following token, which ends the query (pop!).
	     */
	    int curstate = stack[stack.size()-1];
	    if (tokennum < (off_t)numtokens) {
		tokenstruct & tstruct = tokenlist[tokennum];
		curtype = tstruct.first;
		curtoken = tstruct.second;
	    }
	    if (tokennum >= (off_t)numtokens && curstate == S_PQUERY) {
		fprintf(stderr, FUNC ": end-of-query error; expected more after:\n %s\n",
			concatstr(tokenlist, 0, numtokens).c_str());
		goto FAIL;
	    }
	    if ((tokennum >= (off_t)numtokens && curstate == S_QUERY) ||
		(curtype == T_CLOSEPAREN && curstate == S_PQUERY)) {
		/* we are finished with a query, so pop the stack */
		if (curtype == T_CLOSEPAREN && curstate == S_PQUERY) {
		    /* push the close paren out */
		    queryout += ")";
		    tokennum++;
		}
		if (stack.size() == 0) {
		    fprintf(stderr, FUNC ": stack empty!\n");
		    goto FAIL;
		}
		stack.pop_back();
		if (stack.size() == 0) {
		    if (tokennum >= (off_t)numtokens) { /* we're done! */
			checkstateend = 0;
		    } else {
			fprintf(stderr, FUNC ": stack empty!\n");
			goto FAIL;
		    }
		}
	    } else {
		if (curtype == T_AND) {
		    queryout += " and ";
		    if (!NEXTTOKEN(tokenlist, tokennum, curtype, curtoken))
			goto FAIL;
		} else if (curtype == T_OR) {
		    queryout += " or ";
		    if (!NEXTTOKEN(tokenlist, tokennum, curtype, curtoken))
			goto FAIL;
		} else {
		    fprintf(stderr, FUNC ": garbage found after:\n %s\nhere:\n %s\n",
			    concatstr(tokenlist, 0, tokennum).c_str(),
			    concatstr(tokenlist, tokennum, numtokens-tokennum).c_str());
		    goto FAIL;
		}
		checkstateend = 0;
	    }
	}
    }

    goto EXIT;

  FAIL:
    return NULL;
    
  EXIT:
    return strdup(queryout.c_str());
}

struct stackelem {
    std::string funcname;
    int parendepth;
    std::vector<std::string> args;
    stackelem(std::string funcname_in, int parendepth_in)
	: funcname(funcname_in),
	  parendepth(parendepth_in) {
	/* no-op */
    }
};

#undef FUNC
#define FUNC "expand_xpath_event"
char *
expand_xpath_event(const char * queryin)
{
    std::string newxpath("");
    const char * reservednames[] = {
	"comment",
	"text",
	"processing-intruction",
	"ancestor-or-self",
	"ancestor",
	"attribute",
	"child",
	"descendant-or-self",
	"descendant",
	"following-sibling",
	"following",
	"namespace",
	"parent",
	"preceding-sibling",
	"preceding",
	"self"
    };
    int numreserved = sizeof(reservednames) / sizeof(char *);
    const char * curpos = NULL;
    typedef enum {
	T_INVALID,
	T_WHITESPACE,
	T_QNAME,
	T_NONVAR,
	T_VAR
    } tokentype;
    typedef std::pair<int, std::string> tokenstruct;
    std::vector<tokenstruct> tokenlist;
    std::vector<stackelem> stack;
    std::string lastqname;
    int parendepth = 0;
    off_t tokennum = 0;
    size_t numtokens = 0;
    std::map<std::string, std::string> magicparams;

    typedef std::vector<std::string> strvec;

    if (queryin == NULL) {
	fprintf(stderr, FUNC ": queryin is null!\n");
	goto FAIL;
    }

    /* first convert into tokens */
    curpos = queryin;
    while (*curpos) {
	int tokentype = -1;
	size_t tokenlen = 0;
	const char * tokenstart = curpos;
	std::string whitespace;
	while (*curpos && isspace(*curpos)) {
	    whitespace += *curpos;
	    tokenstart++;
	    curpos++;
	}
	if (whitespace.length() > 0) {
	    tokenlist.push_back(tokenstruct(T_WHITESPACE,whitespace));
	}
	if (isdigit(*curpos) || *curpos == '.') {
	    /* NUMTOKEN   ::=  DIGIT+ "." DIGIT*
	     *              |  DIGIT+
	     *              |  "." DIGIT+
	     */
	    while (isdigit(*curpos)) { tokenlen++; curpos++; }
	    if (*curpos == '.')      { tokenlen++; curpos++; }
	    while (isdigit(*curpos)) { tokenlen++; curpos++; }
	    tokentype = T_NONVAR;
	} else if (*curpos == '"' || *curpos == '\'') {
	    /* STRTOKEN   ::=  "'" STRCHAR1+ "'"
	     *              |  '"' STRCHAR2+ '"'
	     * STRCHAR1   ::= any ASCII character except single quote (')
	     * STRCHAR2   ::= any ASCII character except double quote (")
	     */
	    char quotechar = *curpos;
	    tokenlen++; curpos++;
	    while (*curpos != '\0' && *curpos != quotechar) {
		tokenlen++;
		curpos++;
	    }
	    if (*curpos != quotechar) {
		fprintf(stderr, FUNC ": didn't find end of string character (%c):\n %s\n", quotechar, tokenstart);
		goto FAIL;
	    }
	    tokenlen++; /* include end-of-string character */
	    curpos++; /* get rid of end-of-string character */
	    tokentype = T_NONVAR;
	} else if (*curpos == '$' || *curpos == '%' || *curpos == '_' ||
		   isalpha(*curpos)) {
	    /* PARAMTOKEN ::=  "$" PARAMSTART PARAMCHAR*
	     *              |  "%" PARAMSTART PARAMCHAR*
	     *              |      PARAMSTART PARAMCHAR*
	     * PARAMSTART ::=  "_" | LETTER
	     * PARAMCHAR  ::=  "." | "_" | LETTER | DIGIT
	     */
	    int foundreserved = 0;
	    int foundvar = 0;
	    int foundcolon = 0;
	    if (*curpos == '$' || *curpos == '%') {
		foundvar = 1;
		tokenlen++;
		curpos++;
	    }
	    if (*curpos != '_' && !isalpha(*curpos)) {
		fprintf(stderr, FUNC ": parameter name must start with underscore (_) or letter at:\n %s\n", curpos);
		goto FAIL;
	    }
	    tokenlen++;
	    curpos++;
	    while (*curpos == '.' || *curpos == '_' || *curpos == '-' || *curpos == ':' || isalnum(*curpos)) {
		/* dash is allowed in reserved names */
		if (*curpos == '-') {
		    /* may be a reserved name, or could be the
		     * ending of a non-reserved name, check below */
		    break;
		}
		if (*curpos == ':') {
		    if (foundcolon) {
			/* colon is allowed in qualified names, but only once */
			break;
		    }
		    if (*(curpos + 1) == '*') {
			/* qualified wildcard name test (i.e. NAME:*) */
			tokenlen++;
			curpos++;
			break;
		    }
		    foundcolon = 1;
		}
		tokenlen++;
		curpos++;
	    }
	    if (!foundvar) {
		for (int namenum = 0; namenum < numreserved; namenum++) {
		    const char * name = reservednames[namenum];
		    size_t namelen = strlen(name);
		    if (strncmp(tokenstart, name, namelen) == 0) {
			tokenlen = namelen;
			curpos = tokenstart + tokenlen;
			foundreserved = 1;
			break;
		    }
		}
	    }
	    if (foundreserved) {
		tokentype = T_NONVAR;
	    } else if (foundvar) {
		tokentype = T_VAR;
	    } else {
		tokentype = T_QNAME;
	    }
	} else if (strchr("()[]@,/|+-=<>*", *curpos) != NULL) {
	    int c = *curpos;
	    tokenlen++;
	    curpos++;
	    if ((c == '.' && *curpos == '.') ||
		(c == '/' && *curpos == '/') ||
		(c == '<' && *curpos == '=') ||
		(c == '>' && *curpos == '=')) {
		tokenlen++;
		curpos++;
	    }
	    tokentype = T_NONVAR;
	} else if (*curpos == '!' && *(curpos + 1) == '=') {
	    tokenlen += 2;
	    curpos += 2;
	    tokentype = T_NONVAR;
	} else {
	    fprintf(stderr, FUNC ": unrecognized syntax at:\n %s\n", tokenstart);
	    goto FAIL;
	}
	tokenlist.push_back(tokenstruct(tokentype,std::string(tokenstart, tokenlen)));
    }

    if (*curpos != '\0') {
	fprintf(stderr, FUNC ": found unparseable XPath here:\n %s\n", curpos);
	goto FAIL;
    }

    if (tokenlist.size() == 0) {
	fprintf(stderr, FUNC ": queryin is empty!\n");
	goto FAIL;
    }

    magicparams[std::string("$onset")] = std::string("onset");
    magicparams[std::string("onset")] = std::string("onset");
    magicparams[std::string("$duration")] = std::string("duration");
    magicparams[std::string("duration")] = std::string("duration");
    magicparams[std::string("$type")] = std::string("@type");
    magicparams[std::string("type")] = std::string("@type");
    magicparams[std::string("$units")] = std::string("units");
    magicparams[std::string("units")] = std::string("units");
    magicparams[std::string("$description")] = std::string("description");
    magicparams[std::string("description")] = std::string("description");

    tokennum = 0;
    numtokens = tokenlist.size();

    while (stack.size() > 0 && tokennum < (off_t)numtokens) {
	tokenstruct & tstruct = tokenlist[tokennum];
	int curtype = tstruct.first;
	std::string curtoken = tstruct.second;
	int popfunc = 0;
	int gotcomma = 0;
	int gotopenparen = 0;
	std::string newtext;
	std::string savedlastqname = lastqname;
	lastqname = "";
	if (curtype == T_VAR || curtype == T_QNAME) {
	    if (magicparams.find(curtoken) != magicparams.end()) {
		newtext += magicparams[curtoken];
	    } else if (curtoken[0] == '%') {
		newtext += curtoken.substr(1);
	    } else {
		if (curtype == T_QNAME) {
		    lastqname = curtoken;
		}
		newtext += curtoken;
	    }
	} else if (curtype == T_NONVAR) {
	    if (curtoken == "(") {
		gotopenparen = 1;
		parendepth++;
		if (savedlastqname.length() > 0) {
		    /* assume this was a "function" call.  remove function
		     * name already output to newxpath or stack
		     */
		    std::string * strref = &newxpath;
		    if (stack.size() > 0) {
			/* saved lastqname was saved to funcstack not newxpath */
			strref = &(stack.back().args.back());
		    }
		    size_t namelen = savedlastqname.length();
		    if (strref->substr(strref->length() - namelen - 1, namelen) != savedlastqname) {
			fprintf(stderr, FUNC ": Internal error: %s | %s\n", savedlastqname.c_str(), strref->c_str());
			goto FAIL;
		    }
		    stack.push_back(stackelem(savedlastqname, parendepth));
		}
	    } else if (curtoken == ",") {
		gotcomma = 1;
	    } else if (curtoken == ")") {
		if (stack.size() > 0 && stack.back().parendepth == parendepth) {
		    popfunc = 1;
		}
		parendepth--;
	    }
	    newtext += curtoken;
	} else if (curtype == T_WHITESPACE) {
	    newtext += curtoken;
	} else {
	    fprintf(stderr, FUNC ": Got invalid token type %d (token %s)\n", curtype, curtoken.c_str());
	    goto FAIL;
	}
	if (popfunc) {
	    /* see if this is a function we need to deal with */
	    stackelem funcref = stack.back();
	    stack.pop_back();
	    std::string funcname = funcref.funcname;
	    strvec & args = funcref.args;
	    if (funcname == "matchany") {
                /* Usage: matchany(EXPR, VAL1, VAL2, ...)
		 * Returns true if EXPR string-wise matches any one of the VAL
		 * arguments.
		 */

		/* protect each member of the haystack */
		std::string protect1("<");
		std::string protect2(">");
		std::vector<std::string *> protects;
		protects.push_back(&protect1);
		protects.push_back(&protect2);
		for (std::vector<std::string *>::iterator piter = protects.begin();
		     piter != protects.end();
		     piter++) {
		    bool tryagain = true;
		    while (tryagain) {
			tryagain = false;
			for (strvec::iterator argiter = args.begin();
			     argiter != args.end();
			     argiter++) {
			    if (argiter->find(**piter) == argiter->npos) {
				continue;
			    }
			    /* found delimiter, need to change it and try again*/
			    (**piter).push_back(*((**piter).end() - 1));
			    tryagain = true;
			    break;
			}
		    }
		}

		/* create the haystack */
		std::string haystack;
		for (strvec::iterator argiter = args.begin() + 1;
		     argiter != args.end();
		     argiter++) {
		    /* check if a number (and normalize) */
		    std::string sign, integral, point, fraction;
		    std::string & arg = *argiter;
		    std::string::iterator striter = arg.begin();
		    while (striter != arg.end() && isspace(*striter)) { striter++; } // ignore space
		    if (*striter == '+' or *striter == '-') { sign += *(striter++); }
		    while (striter != arg.end() && *striter == '0') { striter++; } // ignore leading zeros
		    while (striter != arg.end() && isdigit(*striter)) { integral += *(striter++); } // integral
		    if (*striter == '.') { point += *(striter++); } // point
		    while (striter != arg.end() && isdigit(*striter)) { fraction += *(striter++); } // fractional
		    while (striter != arg.end() && isspace(*striter)) { striter++; } // ignore space
		    if (striter == arg.end()) {
			/* this is a number -- remove trailing zeros */
			while (*(arg.end() - 1) == '0') { arg.erase(arg.end() - 1); }
			if (fraction.length() == 0) {
			    // make sure point is not output unless necessary
			    point = "";
			}
			haystack += protect1 + sign + integral + point + fraction + protect2;
			continue;
		    }
		    striter = arg.begin();
		    if (*striter == '"' or *striter == '\'') {
			char quote = *striter;
			striter++;
			while (*striter != '"' && *striter != quote) {
			    striter++;
			}
			if (*striter == quote) {
			    /* this is a string we can just add */
			    haystack += protect1 + arg.substr(1, arg.length() - 2) + protect2;
			    continue;
			}
		    }
		    /* not a simple string -- need to do haystack the
		     * complicated way */
		    haystack = "";
		    break;
		}
		newtext = "";
		newtext += "contains(";
		if (haystack.length() > 0) {
		    newtext += "\"";
		    newtext += haystack;
		    newtext += "\"";
		} else {
		    newtext += "concat(";
		    for (strvec::iterator argiter = args.begin() + 1;
			 argiter != args.end();
			 argiter++) {
			 newtext += '"';
			  newtext += protect1;
			 newtext += '"';
			newtext += ',';
			 newtext += "string(";
			  newtext += *argiter;
			 newtext += ')';
			newtext += ',';
			 newtext += '"';
			 newtext += protect2;
			 newtext += '"';
			newtext += ',';
		     }
		     newtext += "\"\"";
		    newtext += ')';
		}
		newtext += ',';
		 newtext += "concat(";
		  newtext += '"';
		  newtext += protect1;
		  newtext += '"';
		 newtext += ',';
		  newtext += "string(";
		   newtext += args[0];
		  newtext += ')';
		 newtext += ',';
		  newtext += '"';
		  newtext += protect2;
		  newtext += '"';
		 newtext += ')';
		newtext += ')';
	    } else {
		newtext = "";
		newtext += funcname;
		newtext += '(';
		if (args.size() > 0) {
		    for (strvec::iterator argiter = args.begin();
			 argiter != args.end();
			 argiter++) {
			newtext += *argiter;
			newtext += ", ";
		    }
		    // get rid of trailing comma/whitespace
		    newtext.erase(newtext.end() - 2, newtext.end());
		}
		newtext += ')';
	    }
	}
	if (newtext.length() > 0) {
	    if (stack.size() > 0) {
		stackelem & funcref = stack.back();
		strvec & args = funcref.args;
		bool atcurdepth = (funcref.parendepth == parendepth);
		if (atcurdepth && gotcomma) {
		    if (args.size() > 0) {
			// there was actually an argument here, and we got
			// a comma, so create a new empty argument to hold
			// future text
			args.push_back("");
		    }
		    // otherwise, just keep it empty in case it is an empty
		    // argument list
		} else if (atcurdepth && gotopenparen) {
		    // just got the open paren, no need to encoding anything
		} else {
		    if (args.size() == 0) {
			args.push_back(newtext);
		    } else {
			args.back() += newtext;
		    }
		}
		// whatever was in newtext is encoded now in the args and will
		// get output when the function call hits the closing paren
	    } else {
		newxpath += newtext;
	    }
	}
    }

    goto EXIT;

  FAIL:
    return NULL;
    
  EXIT:
    return strdup(newxpath.c_str());
}

/*
 * $Log: In-line log eliminated on transition to SVN; use svn log instead. $
 * Revision 1.22  2008/05/22 13:47:51  gadde
 * Move include.
 *
 * Revision 1.21  2008/05/22 13:12:51  gadde
 * Include ctype.h for isspace, etc.
 *
 * Revision 1.20  2008/03/27 14:55:27  gadde
 * When canonicalizing the event list, warn if there is a duration less than zero.
 *
 * Revision 1.19  2007/12/05 18:27:06  gadde
 * Alter diagnostic output to make it more easy to grep.
 *
 * Revision 1.18  2006/06/23 21:23:09  gadde
 * Error logging updates.
 *
 * Revision 1.17  2006/06/01 17:41:11  gadde
 * Update logfp usage.
 *
 * Revision 1.16  2006/06/01 15:28:07  gadde
 * Allow logfp to be NULL as argument to various functions.
 *
 * Revision 1.15  2005/07/12 17:22:45  gadde
 * Events that pass the filterquery never got added to the new list.  Fixed.
 *
 * Revision 1.14  2005/04/27 14:31:22  gadde
 * Add comment to be clear what happens if NULL query is sent to filter_events.
 *
 * Revision 1.13  2005/04/25 15:36:45  gadde
 * Don't throw away new event list in filter_events.
 *
 * Revision 1.12  2005/04/22 20:34:02  gadde
 * Another fix for end-of-string.
 *
 * Revision 1.11  2005/04/22 20:31:33  gadde
 * Fix string parsing in event query syntax.
 *
 * Revision 1.10  2005/04/01 22:26:49  gadde
 * Updates to allow more than one event per timepoint.
 *
 * Revision 1.9  2005/03/25 22:37:31  gadde
 * Add updates to event query language, and streamline the state machine.
 *
 * Revision 1.8  2005/03/04 15:51:41  gadde
 * Revert support for '-'.
 *
 * Revision 1.7  2005/03/04 15:49:39  gadde
 * Add support for '-' in parameter names, but warn the user.
 *
 * Revision 1.6  2005/03/02 17:52:49  gadde
 * Fix last fix (!= vs. ==).
 *
 * Revision 1.5  2005/03/02 17:50:52  gadde
 * Put quotes around strings in converted queries.
 *
 * Revision 1.4  2005/03/01 22:03:23  gadde
 * Fix missing $ and % check.
 *
 * Revision 1.3  2005/03/01 21:16:26  gadde
 * Add '$' and '%' prefix support for magic values.
 *
 */
