/*
 * matrix.cc implements the matrix command and class
 * Written by Charles Peterson beginning on October 28, 1997
 * Copyright (c) 1997 Southwest Foundation for Biomedical Research
 */

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>

#include "solar.h"
#include "tablefile.h"
#include "pipeback.h"

int Matrix::count = 0;
Matrix* Matrix::Matrices[] = {0};
FD_Array<int> Matrix::Pedno(1024,1024);
FD_Array<int> Matrix::Pedsize(1024,1024);
int Matrix::last_pedno;
bool Matrix::Pedno_Current = false;
int Matrix::Pedno_Highest_ID = 0;
//
// The most fun part is...getting and setting values.
//
// Since IBDID's start from 1, 1-based id's are always assumed.
//
// Matrix data is either stored in little matrices (one per pedigree)
// or one big matrix (for the entire set of pedigrees).  This is indicated
// by the boolean ids_within_peds
//
// Within the matrices, they matrices are compressed into the upper diagonal
// (?) half...the get_index() function takes care of that.  This is because
// all matrices are symmetric (?).
//
// This is all done because otherwise matrices can get so big they can
// consume all available memory.
//
// To avoid the boolean and conditional test, we could have a subclass
//    but then we would have method lookup, which would likely take more time
//
float Matrix::get (int id1, int id2)
{
    if (ids_within_peds)
    {
	int ped1 = Pedno[id1];
	int ped2 = Pedno[id2];
	if (ped1 != ped2) return 0.0;
	return pedmat[ped1].values[get_index(id1,id2,pedmat[ped1].start)];
    }
    return pedmat[0].values[get_index(id1,id2,1)];
}


void Matrix::set (int id1, int id2, float value)
{
    if (ids_within_peds)
    {
	int ped1 = Pedno[id1];
	int ped2 = Pedno[id2];
	if (ped1 == ped2) {
	    int index = get_index (id1,id2,pedmat[ped1].start);
	    pedmat[ped1].values[index] = value;
	}
    }
    else
    {
	pedmat[0].values[get_index (id1,id2,1)] = value;
    }
}


Matrix::Matrix (const char *name)
{
    _name = Strdup (name);
    filename = Strdup ("");
    min = 1.0;
    max = 0.0;
    defaultable = false;
    second_matrix = 0;
    first_matrix = 0;
    _ibd = false;
    _d7 = false;
    pedmat = 0;
    pedmat_count = 0;
}

Matrix::~Matrix ()
{
    remove ();
    free (_name);
    free (filename);
    delete [] pedmat;  // Initialized to 0 by constructor, allocated by new

    if (second_matrix)
    {
	delete second_matrix;
    }
}

// find this matrix and remove from Matrices array
// if not in Matrices array, nothing is done
void Matrix::remove ()
{
    int i;
    for (i = 0; i < count; i++)
    {
	if (Matrices[i] == this) break;
    }
    if (i >= count)
    {
	return;  // Wasn't added to array
    }
    for (i++; i < count; i++)
    {
	Matrices[i-1] = Matrices[i];
    }
    Matrices[i-1] = 0;
    count--;
}

	    
void Matrix::reset ()
{
    int i;
    for (i = count-1; i >= 0; i--)
    {
	delete Matrices[i];
    }
}

void Matrix::add ()
{
    Matrices[count++] = this;
}

Matrix *Matrix::find (const char *nam)
{
    Matrix *m;
    int i = count-1;
    for (; i >= 0; i--)
    {
	m = index (i);
	if (!m) continue;
	if (!StringCmp (nam, m->name(), case_ins))
	{
	    return m;
	}
	if (m->second_matrix)
	{
	    if (!StringCmp (nam, m->second_matrix->name(), case_ins))
	    {
		return m->second_matrix;
	    }
	}
    }
    return 0;
}

char *Matrix::command (char *buf)
{
    if (second_matrix)
    {
	sprintf (buf, "matrix load %s %s %s", filename, name(), 
		 second_matrix->name()); // handle 2 matrices
    }
    else
    {
	sprintf (buf, "matrix load %s %s ", filename, name() );
    }
    return buf;
}

char *Matrix::describe (char *buf)
{
    if (second_matrix)
    {
	sprintf (buf, 
"matrix file=%s size=%d\n\
       name=%s min=%10.8f max=%10.8f\n\
       name=%s min=%10.8f max=%10.8f",
		 filename, highest_id, name(), min, max,
                 second_matrix->name(), 
                 second_matrix->min, second_matrix->max);
    }
    else
    {
	sprintf (buf, 
"matrix file=%s size=%d\n\
                 name=%s min=%10.8f max=%10.8f",
		 filename, highest_id, name(), min, max);
    }
    return buf;
}

char *Matrix::describe_all (char *buf)
{
    int index = 0;
    Matrix *m;
    for (int i = 0; m = Matrix::index(i); i++)
    {
	m->describe (&buf[index]);
	index = strlen (buf);
	buf[index++] = '\n';
    }
    buf[index] = '\0';
    return buf;
}

char *Matrix::commands (char *buf)
{
    int index = 0;
    Matrix *m;
    for (int i = 0; m = Matrix::index(i); i++)
    {
	m->command (&buf[index]);
	index = strlen (buf);
	buf[index++] = '\n';
    }
    if (index > 0) index--;
    buf[index] = '\0';
    return buf;
}

int Matrix::return_all (Tcl_Interp* interp)
{
    char* buf = Strdup ("");
    char tbuf[1024];
    Matrix* m;

    for (int i = 0; m = Matrix::index(i); i++)
    {
	string__append (&buf,"{");
	m->command(tbuf);
	string__append (&buf,tbuf);
	string__append (&buf,"}\n");
    }
    RESULT_BUF(buf);
    free (buf);
    return TCL_OK;
}



void Matrix::write_commands (FILE *file)
{
    int index = 0;
    Matrix *m;
    for (int i = 0; m = Matrix::index(i); i++)
    {
	char buf[256];
	fprintf (file, "%s\n", m->command (&buf[index]));
    }
}

void Matrix::Changing_Pedigree ()
{
    if (Pedno_Current)
    {
	Pedno.renew();
	Pedsize.renew();
    }
    Pedno_Current = false;
}

const char* Matrix::load_pedigree ()
{
    if (!Pedno_Current)
    {
	const char *errmsg;
	TableFile *pedindex = TableFile::open ("pedindex.out", &errmsg);
	if (errmsg)
	{
	    return "Pedigree must be loaded first";
	}
	pedindex->start_setup (&errmsg);
	pedindex->setup ("IBDID", &errmsg);
	pedindex->setup ("PEDNO", &errmsg);
	if (errmsg)
	{
	    return "Something wrong with pedindex file";
	}
	Pedno.renew();
	Pedsize.renew();
	char** data;
	last_pedno = -1;
	int pedsize = 0;
	int ibdid;
	int pedno;
	while (0 != (data = pedindex->get (&errmsg)))
	{
	    ibdid = atoi (data[0]);
	    pedno = atoi (data[1]);
	    if (last_pedno != -1 && last_pedno != pedno)
	    {
		Pedsize.set (last_pedno, pedsize);
		pedsize = 0;
	    }
	    Pedno.set (ibdid, pedno);
	    last_pedno = pedno;
	    pedsize++;
	}
	Pedno_Highest_ID = ibdid;
	if (last_pedno != -1)
	{
	    Pedsize.set (last_pedno, pedsize);
	}
	if (errmsg && Strcmp (errmsg, "EOF")) {
	    Pedno.renew();
	    Pedsize.renew();
	    fprintf (stderr, "Errmsg: %s\n", errmsg);
	    return "Error reading pedigree for matrix";
 	}
	delete pedindex;
	Pedno_Current = true;
    }
    return 0;
}

// May be new "load," or a "re-load" of same filename

const char* Matrix::load (const char *specified_filename)
{
// Remove this matrix from matrix array until done
    remove ();

// working variables and names
    int scount, id1, id2;
    Matrix *m1 = this;
    Matrix *m2 = m1->second_matrix;

// Update Pedno and Pedsize tables if necessary
    const char* errmsg;
    if (   (errmsg = load_pedigree()) )
    {
	return errmsg;
    }

// Clear out old matrix storage
    if (pedmat_count) {
	delete [] pedmat;
	pedmat = 0;
	pedmat_count = 0;
	if (m2) {
	    delete [] m2->pedmat;
	    m2->pedmat = 0;
	    m2->pedmat_count = 0;
	}
    }

// Scan IBD file to see if there are ID's not in same pedigree
// and find highest ID
    char *loading_filename;
    if (specified_filename)
    {
	loading_filename = append_extension (specified_filename, ".gz");
    }
    else
    {
	loading_filename = Strdup (filename);
    }
    FILE *mfile = fopen (loading_filename, "r");
    if (!mfile)
    {
	return "Unable to open matrix file";
    }
    if (EOF == fgetc (mfile))
    {
	return "Matrix file is empty";
    }
    Fclose (mfile);
    const char *pbarg[4];
    pbarg[0] = "gunzip";
    pbarg[1] = "-c";
    pbarg[2] = loading_filename;
    pbarg[3] = 0;
    mfile = pipeback_shell_open ("gunzip", pbarg);
    if (!mfile)
    {
	return "Unable to uncompress file";
    }

    char buf[256];
    if (!fgets (buf, 256, mfile))
    {
	pipeback_shell_close (mfile);
	return 	"Matrix load failed for lack of memory";
    }
    if (buf[0] != ' ' && !isdigit (buf[0]))  // Must be an error message
    {
	pipeback_shell_close (mfile);
	return 	"Unable to decompress matrix file";
    }

// Divide line(s) into two parts based on decimal point
//   First part is ID1 ID2 [Space] (both integers)
//   Second part is VAL1 [VAL2] (both floats)
//   Second part begins 1 or 2 characters to the left of decimal
//   If 2nd character left is digit, second part begins 1 character left
//   otherwise 2 characters left

//
// Note: The "load matrix" code in matrix.cc permits any fixed position
// for the beginning of data values.  However, because of the way the checksum
// is currently written by matcrc, starting in position 14, that requires
// the rest of the matrix to follow that precedent, with possible deviation
// of one character position (data values could start in column 13, though
// that is not recommended).  In future it may be required to make matcrc
// actually look at the rest of the file to allow for IBDID's higher than
// 99999, which would require making the starting data position higher.
//

    char *decimal_ptr = strchr (buf, '.');
    int dpos =  decimal_ptr - buf;
    if (!decimal_ptr || dpos < 4)
    {
	pipeback_shell_close (mfile);
	return "Invalid matrix file format";
    }

    int first_len = dpos - 2;
    if (isdigit (buf[first_len])) first_len++;

// Now check ID's for this record and rest of file

    ids_within_peds = true;
    int record_number = 1;
    int first_id1;
    int first_id2;
    char savebuf[256];

    for (;;) // This is a "repeat until"
    {
	scount = sscanf (buf, "%d %d", &id1, &id2);
	if (scount != 2)
	{
	    pipeback_shell_close (mfile);
	    return "Invalid matrix file format: missing ID";
	}
	try
	{
	    if (Pedno[id1] != Pedno[id2])
	    {
		ids_within_peds = false;
	    }
	}
	catch (Out_of_Bounds)
	{
	    pipeback_shell_close (mfile);
	    return "ID's in matrix not found in pedigree";
	}
	highest_id = id1;
//
// Test checksum if present
//
	if (record_number == 1)
	{
	    first_id1 = id1;
	    first_id2 = id2;
	    strncpy (savebuf, buf, 256);
	}
	else if (record_number == 2)
	{
	    if (first_id1 == id1 && first_id2 == id2)
	    {
//
// Get checksum from matrix
//
		bool got_matrix_cksum = false;
		unsigned matrix_cksum;
		char* cksum_pointer;
		if ((cksum_pointer = strchr (savebuf, '.')))
		{
		    cksum_pointer++;
		    if (sscanf (cksum_pointer, "%u", &matrix_cksum))
		    {
			got_matrix_cksum = true;
		    }
		}
		if (!got_matrix_cksum)
		{
		    pipeback_shell_close (mfile);
		    return "Invalid matrix checksum (cksum) in line 1";
		}
//
// Get checksum from pedindex.out
//
		FILE* pfile = fopen ("pedindex.out", "r");
		if (!pfile)
		{
		    pipeback_shell_close (mfile);
		    return "Can't find pedindex.out";
		}
		fclose (pfile);

		unsigned pedindex_cksum;
		const char* carg[3];
		carg[0] = "cksum";
		carg[1] = "pedindex.out";
		carg[2] = 0;
		FILE* cfile = pipeback_shell_open ("cksum", carg);
		if (!cfile)
		{
		    pipeback_shell_close (mfile);
		    return "Unable to run cksum";
		}
		if (!fgets (buf, 256, cfile))
		{
		    pipeback_shell_close (mfile);
		    pipeback_shell_close (cfile);
		    return "Error reading checksum of pedindex.out";
		}
		if (1 != (sscanf (buf, "%u", &pedindex_cksum)))
		{
		    pipeback_shell_close (mfile);
		    pipeback_shell_close (cfile);
		    return "Error scanning checksum of pedindex.out";
		}
		pipeback_shell_close (cfile);
		if (matrix_cksum != pedindex_cksum)
		{
		    pipeback_shell_close (mfile);
		    return "Checksum (cksum) in matrix doesn't match pedindex.out";
		}
	    }
	}
//
// Get next record, break on end
//
	if (! fgets (buf, 256, mfile)) break;
	record_number++;
    }
    pipeback_shell_close (mfile);
    if (id1 != id2) {
	return "Invalid matrix file format: last not diagonal";
    }
    if (m2) {
	m2->ids_within_peds = ids_within_peds;
	m2->highest_id = highest_id;
    }
//
// Allocate a single large matrix if id's are not all in peds
//   We allocate a "half matrix" since it's symmetric
//  
    if (!ids_within_peds) {
	int half_size = 1 + get_index (Pedno_Highest_ID, Pedno_Highest_ID, 1);
	pedmat_count = 1;
	pedmat = new PedMatrix[pedmat_count];
	pedmat->values = new float [half_size];
	memset ((void*) pedmat->values, 0, sizeof(float)*half_size);
	if (m2) {
	    m2->pedmat_count = pedmat_count;
	    m2->pedmat = new PedMatrix[pedmat_count];
	    m2->pedmat->values = new float[half_size];
	    memset ((void*) m2->pedmat->values, 0, sizeof(float)*half_size);
	}

// Initialize diagonal elements to -1
	for (int i=1; i <= Pedno_Highest_ID; i++)
	{
	    m1->set (i,i,-1.0);
	    if (m2) m2->set (i,i,-1.0);
	}
//
// Allocate small "half-matrices" for each pedigree
//
    }
    else
    {
	pedmat_count = last_pedno;
	pedmat = new PedMatrix [ pedmat_count + 1 ];  // use 1-based index
	if (m2) {
	    m2->pedmat_count = pedmat_count;
	    m2->pedmat = new PedMatrix[ pedmat_count + 1 ];
	}
	int first_id = 1;
	for (int i=1; i <= last_pedno; i++)
	{
	    int size = Pedsize[i];
	    int half_size = 1 + get_index (size, size, 1);
	    pedmat[i].values = new float [half_size];
	    memset ((void*) pedmat[i].values, 0, sizeof(float)*half_size);
	    pedmat[i].start = first_id;
	    for (int j = first_id; j < first_id+size; j++)
	    {
		set (j, j, -1.0);
	    }
	    if (m2) {
		m2->pedmat[i].values = new float[half_size];
		memset ((void*) m2->pedmat[i].values, 0, 
			sizeof(float)*half_size);
		m2->pedmat[i].start = first_id;
		for (int j = first_id; j < first_id+size; j++)
		{
		    m2->set (j, j, -1.0);
		}
	    }
	    first_id += size;
	}
    }

// Read in file, this time saving matrix values

    mfile = pipeback_shell_open ("gunzip", pbarg);
    if (!mfile)
    {
	return "Unable to uncompress file 2nd time";
    }

    while (fgets (buf, 256, mfile))
    {
	float val1, val2;
	char savech = buf[first_len];
	buf[first_len] = '\0';
	scount = sscanf (buf, "%d %d", &id1, &id2);
	if (scount != 2)
	{
	    pipeback_shell_close (mfile);
	    return "Error reading matrix file record";
	}
	buf[first_len] = savech;
	if (!m2)
	{
	    scount = sscanf (&buf[first_len], "%f", &val1);
	    if (scount != 1)
	    {
		pipeback_shell_close (mfile);
		return "Error reading single matrix value";
	    }
	    m1->set (id1, id2, val1);
	    if (m1->max < val1) m1->max = val1;
	    if (m1->min > val1 && val1 > 0.0) m1->min = val1;
	    if (m1->_ibd && id1 == id2 && val1 == -1.0)
	    {
		for (int rc = 1; rc <= m1->Pedno_Highest_ID; rc++)
		{
		    m1->set (id1, rc, -1.0);
		}
	    }
	}
	else
	{
	    scount = sscanf (&buf[first_len], "%f %f", &val1, &val2);
	    if (scount != 2)
	    {
		pipeback_shell_close (mfile);
		return "Error reading double matrix values";
	    }
	    m1->set (id1, id2, val1);
	    m2->set (id1, id2, val2);
	    if (m1->max < val1) m1->max = val1;
	    if (m1->min > val1) m1->min = val1;
	    if (m2->max < val2) m2->max = val2;
	    if (m2->min > val2) m2->min = val2;
	    if (m1->_ibd && id1 == id2 && val1 == -1.0)
	    {
		for (int rc = 1; rc <= m1->Pedno_Highest_ID; rc++)
		{
		    m1->set (id1, rc, -1.0);
		}
	    }
	    if (m2->_d7 && id1 == id2 && val2 == -1.0)
	    {
		for (int rc = 1; rc <= m2->Pedno_Highest_ID; rc++)
		{
		    m2->set (id1, rc, -1.0);
		}
	    }
	}
    }		
    pipeback_shell_close (mfile);
    free (filename);
    filename = loading_filename;
    add ();
    return 0;
}


// If named matrix already exists, setup reloads it
// otherwise, it creates new matrix

const char* Matrix::setup (const char *filename, const char *name1, 
		     const char *name2)
{
    Matrix* oldm = Matrix::find (name1);
    Matrix* m1;
    if (oldm)
    {
// Matrix with same name already exists.  Set up for re-load.
	m1 = oldm;
	if (name2) {
// Setup second matrix
	    if (m1->second_matrix) {
		free (m1->second_matrix->_name);
		m1->second_matrix->_name = Strdup (name2);
	    } else {
		m1->second_matrix = new Matrix (name2);
		if (m1->_ibd) m1->second_matrix->_d7 = true;
	    }
	} else {
// Delete previous second matrix, if any
	    if (m1->second_matrix) {
		delete m1->second_matrix;
		m1->second_matrix = 0;
	    }
	}
    }
    else
    {
// New primary matrix required
	m1 = new Matrix (name1);

// See if this is a twopoint (ibd,d7) or multipoint (mibd,d7) matrix
// Such matrices have follow the -1 convention in which a -1 on the diagonal
// Causes a (sub-)matrix to default to phi2/delta7

        char name3[128];
	strncpy (name3, name1, 3);
	name3[3] = '\0';

	char name4[128];
	strncpy (name4, name1, 4);
	name4[4] = '\0';
	
//	fprintf (stderr, "name3 is >%s< and name4 is >%s<\n", name3, name4);

	if (!Strcmp(name3, "ibd") || !Strcmp(name4, "mibd"))
	{
	    m1->_ibd = true;
	}
// Setup second matrix if required
	if (name2)
	{
	    Matrix *oldm2 = Matrix::find (name2);
	    if (oldm2)
	    {

// Previous matrix can be deleted if it is the second matrix of another
//   first matrix, or a first matrix having no second.  If it is the first
//   matrix of a matrix pair, it gets renamed.  If it is the second
//   matrix of another first matrix, be sure to zero that pointer.

		if (oldm2->first_matrix)
		{
//		    fprintf (stderr, "Setting backpointer to 0\n");
		    oldm2->first_matrix->second_matrix = 0;
		    delete oldm2;
		}
		else if (!oldm2->second_matrix)
		{
//		    fprintf (stderr, "Simply deleting it\n");
		    delete oldm2;
		}
		else
		{
//		    fprintf (stderr, "Renaming it\n");
		    char buf[1024];
		    strcpy (buf, "old_");
		    strncat (buf, oldm2->_name, 1024);
		    free (oldm2->_name);
		    oldm2->_name = Strdup (buf);
		}
	    }
	    m1->second_matrix = new Matrix (name2);
	    if (m1->_ibd) m1->second_matrix->_d7 = true;
	}
    }
    if (name2)
    {
	m1->second_matrix->first_matrix = m1;
    }
    const char *message = m1->load (filename);
    if (message)
    {
	delete m1;
    }
    return message;
}


extern "C" int MatrixCmd (ClientData clientData, Tcl_Interp *interp,
		  int argc, char *argv[])
{
    if (argc == 2 && !StringCmp ("help", argv[1], case_ins))
    {
	return Solar_Eval (interp, "help matrix");
    }

    if (argc == 1)
    {
	char buf[10000];
	sprintf (buf, "%s", Matrix::commands (buf));
	RESULT_BUF (buf);
	return TCL_OK;
    }

    if (argc == 2 && !StringCmp ("-return", argv[1], case_ins))
    {
	return  Matrix::return_all (interp);
    }

    if (argc == 2 && !StringCmp ("debug", argv[1], case_ins))
    {
	char buf[10000];
	printf ("%s", Matrix::describe_all (buf));
	return TCL_OK;
    }

    if (argc == 3 && !StringCmp (argv[1], "delete", case_ins))
    {
	Matrix *m = Matrix::find (argv[2]);
	if (!m)
	{
	    RESULT_LIT ("No such matrix");
	    return TCL_ERROR;
	}
	delete m;
	return TCL_OK;
    }

    if (argc == 2 && !StringCmp (argv[1], "delete_all", case_ins))
    {
	Matrix::reset();
	return TCL_OK;
    }

    if ((argc == 4 || argc == 5) && !StringCmp (argv[1], "load", case_ins))
    {
    // Setup new Matrices

	const char *message = 0;
	if (argc == 4)
	{
	    message = Matrix::setup (argv[2], argv[3]);
	}
	else
	{
	    message = Matrix::setup (argv[2], argv[3], argv[4]);
	}
	if (message)
	{
	    char buf[1024];
	    sprintf (buf, "%s:  %s", message, argv[2]);
	    RESULT_BUF (buf);
	    return TCL_ERROR;
	}
	return TCL_OK;
    }	
    RESULT_LIT ("Invalid matrix command");
    return TCL_ERROR;
}

int Matrix::bind (Tcl_Interp *interp)
{
// If pedigree changed, must reload all matrices.
// This is bad, but should be avoided by not re-loading same pedigree

    if (!Pedno_Current && count>0)
    {
	int i;
	for (i=0; i < count; i++)
	{
	    if (Verbosity::max())
	    {
		fprintf (stderr, "Pedigree changed; reloading matrix %d\n", i);
	    }
	    Matrix *m = Matrices[i];
	    m->load();
	}
    }
    return TCL_OK;
}

