#ifndef FUSION_IO_H
#define FUSION_IO_H
#define OUT_INTENSITY_TYPE DT_FLOAT32
#define OUT_LABEL_TYPE DT_UINT16
typedef float intensity_t;
typedef unsigned short label_t;

template <class T> void get_nim_3D(nifti_image * nim,
                                   T *** vol,
                                   const size_t * dims)
{
    size_t count = 0;

    if (nim->datatype == DT_BINARY) {
        bool * data = (bool *)(nim->data);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++)
                    vol[x][y][z] = (T)data[count++];
    } else if (nim->datatype == DT_UINT8) {
        unsigned char * data = (unsigned char *)(nim->data);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++)
                    vol[x][y][z] = (T)data[count++];
    } else if (nim->datatype == DT_INT16) {
        short * data = (short *)(nim->data);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++)
                    vol[x][y][z] = (T)data[count++];
    } else if (nim->datatype == DT_INT32) {
        int * data = (int *)(nim->data);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++)
                    vol[x][y][z] = (T)data[count++];
    } else if (nim->datatype == DT_FLOAT32) {
        float * data = (float *)(nim->data);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++)
                    vol[x][y][z] = (T)data[count++];
    } else if (nim->datatype == DT_FLOAT64) {
        double * data = (double *)(nim->data);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++)
                    vol[x][y][z] = (T)data[count++];
    } else if (nim->datatype == DT_INT8) {
        char * data = (char *)(nim->data);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++)
                    vol[x][y][z] = (T)data[count++];
    } else if (nim->datatype == DT_UINT16) {
        unsigned short * data = (unsigned short *)(nim->data);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++)
                    vol[x][y][z] = (T)data[count++];
    } else if (nim->datatype == DT_UINT32) {
        unsigned int * data = (unsigned int *)(nim->data);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++)
                    vol[x][y][z] = (T)data[count++];
    } else {
        fprintf(stderr, "Datatype: %hd not supported\n",
                        nim->datatype);
        exit(1);
    }
}

template <class T> void get_nim_4D(nifti_image * nim,
                                   T **** vol,
                                   const size_t * dims,
                                   const size_t dim4)
{
    size_t count = 0;
    if (nim->datatype == DT_BINARY) {
        bool * data = (bool *)(nim->data);
        for (size_t j = 0; j < dim4; j++)
            for (size_t z = 0; z < dims[2]; z++)
                for (size_t y = 0; y < dims[1]; y++)
                    for (size_t x = 0; x < dims[0]; x++)
                        vol[x][y][z][j] = (T)data[count++];
    } else if (nim->datatype == DT_UINT8) {
        unsigned char * data = (unsigned char *)(nim->data);
        for (size_t j = 0; j < dim4; j++)
            for (size_t z = 0; z < dims[2]; z++)
                for (size_t y = 0; y < dims[1]; y++)
                    for (size_t x = 0; x < dims[0]; x++)
                        vol[x][y][z][j] = (T)data[count++];
    } else if (nim->datatype == DT_INT16) {
        short * data = (short *)(nim->data);
        for (size_t j = 0; j < dim4; j++)
            for (size_t z = 0; z < dims[2]; z++)
                for (size_t y = 0; y < dims[1]; y++)
                    for (size_t x = 0; x < dims[0]; x++)
                        vol[x][y][z][j] = (T)data[count++];
    } else if (nim->datatype == DT_INT32) {
        int * data = (int *)(nim->data);
        for (size_t j = 0; j < dim4; j++)
            for (size_t z = 0; z < dims[2]; z++)
                for (size_t y = 0; y < dims[1]; y++)
                    for (size_t x = 0; x < dims[0]; x++)
                        vol[x][y][z][j] = (T)data[count++];
    } else if (nim->datatype == DT_FLOAT32) {
        float * data = (float *)(nim->data);
        for (size_t j = 0; j < dim4; j++)
            for (size_t z = 0; z < dims[2]; z++)
                for (size_t y = 0; y < dims[1]; y++)
                    for (size_t x = 0; x < dims[0]; x++)
                        vol[x][y][z][j] = (T)data[count++];
    } else if (nim->datatype == DT_FLOAT64) {
        double * data = (double *)(nim->data);
        for (size_t j = 0; j < dim4; j++)
            for (size_t z = 0; z < dims[2]; z++)
                for (size_t y = 0; y < dims[1]; y++)
                    for (size_t x = 0; x < dims[0]; x++)
                        vol[x][y][z][j] = (T)data[count++];
    } else if (nim->datatype == DT_INT8) {
        char * data = (char *)(nim->data);
        for (size_t j = 0; j < dim4; j++)
            for (size_t z = 0; z < dims[2]; z++)
                for (size_t y = 0; y < dims[1]; y++)
                    for (size_t x = 0; x < dims[0]; x++)
                        vol[x][y][z][j] = (T)data[count++];
    } else if (nim->datatype == DT_UINT16) {
        unsigned short * data = (unsigned short *)(nim->data);
        for (size_t j = 0; j < dim4; j++)
            for (size_t z = 0; z < dims[2]; z++)
                for (size_t y = 0; y < dims[1]; y++)
                    for (size_t x = 0; x < dims[0]; x++)
                        vol[x][y][z][j] = (T)data[count++];
    } else if (nim->datatype == DT_UINT32) {
        unsigned int * data = (unsigned int *)(nim->data);
        for (size_t j = 0; j < dim4; j++)
            for (size_t z = 0; z < dims[2]; z++)
                for (size_t y = 0; y < dims[1]; y++)
                    for (size_t x = 0; x < dims[0]; x++)
                        vol[x][y][z][j] = (T)data[count++];
    } else {
        fprintf(stderr, "Datatype: %hd not supported\n",
                        nim->datatype);
        exit(1);
    }
}

template <class T> void set_nim_3D(nifti_image * nim,
                                   T *** vol,
                                   const size_t * dims)
{

    size_t num_bytes = 0;
    size_t count = 0;

    if (nim->datatype == DT_BINARY) {
        bool * data;
        num_bytes = dims[0]*dims[1]*dims[2]*sizeof(*data);
        nim->nbyper = sizeof(*data);
        data = (bool *)malloc(num_bytes);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++) {
                    data[count] = (bool)vol[x][y][z];
                    count++;
                }
        if (nim->data != NULL) free(nim->data);
        nim->data = malloc(num_bytes);
        memcpy(nim->data, data, num_bytes);
        free(data);
    } else if (nim->datatype == DT_UINT8) {
        unsigned char * data;
        num_bytes = dims[0]*dims[1]*dims[2]*sizeof(*data);
        nim->nbyper = sizeof(*data);
        data = (unsigned char *)malloc(num_bytes);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++) {
                    data[count] = (unsigned char)vol[x][y][z];
                    count++;
                }
        if (nim->data != NULL) free(nim->data);
        nim->data = malloc(num_bytes);
        memcpy(nim->data, data, num_bytes);
        free(data);
    } else if (nim->datatype == DT_INT16) {
        short * data;
        num_bytes = dims[0]*dims[1]*dims[2]*sizeof(*data);
        nim->nbyper = sizeof(*data);
        data = (short *)malloc(num_bytes);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++) {
                    data[count] = (short)vol[x][y][z];
                    count++;
                }
        if (nim->data != NULL) free(nim->data);
        nim->data = malloc(num_bytes);
        memcpy(nim->data, data, num_bytes);
        free(data);
    } else if (nim->datatype == DT_INT32) {
        int * data;
        num_bytes = dims[0]*dims[1]*dims[2]*sizeof(*data);
        nim->nbyper = sizeof(*data);
        data = (int *)malloc(num_bytes);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++) {
                    data[count] = (int)vol[x][y][z];
                    count++;
                }
        if (nim->data != NULL) free(nim->data);
        nim->data = malloc(num_bytes);
        memcpy(nim->data, data, num_bytes);
        free(data);
    } else if (nim->datatype == DT_FLOAT32) {
        float * data;
        num_bytes = dims[0]*dims[1]*dims[2]*sizeof(*data);
        nim->nbyper = sizeof(*data);
        data = (float *)malloc(num_bytes);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++) {
                    data[count] = (float)vol[x][y][z];
                    count++;
                }
        if (nim->data != NULL) free(nim->data);
        nim->data = malloc(num_bytes);
        memcpy(nim->data, data, num_bytes);
        free(data);
    } else if (nim->datatype == DT_FLOAT64) {
        double * data;
        num_bytes = dims[0]*dims[1]*dims[2]*sizeof(*data);
        nim->nbyper = sizeof(*data);
        data = (double *)malloc(num_bytes);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++) {
                    data[count] = (double)vol[x][y][z];
                    count++;
                }
        if (nim->data != NULL) free(nim->data);
        nim->data = malloc(num_bytes);
        memcpy(nim->data, data, num_bytes);
        free(data);
    } else if (nim->datatype == DT_INT8) {
        char * data;
        num_bytes = dims[0]*dims[1]*dims[2]*sizeof(*data);
        nim->nbyper = sizeof(*data);
        data = (char *)malloc(num_bytes);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++) {
                    data[count] = (char)vol[x][y][z];
                    count++;
                }
        if (nim->data != NULL) free(nim->data);
        nim->data = malloc(num_bytes);
        memcpy(nim->data, data, num_bytes);
        free(data);
    } else if (nim->datatype == DT_UINT16) {
        unsigned short * data;
        num_bytes = dims[0]*dims[1]*dims[2]*sizeof(*data);
        nim->nbyper = sizeof(*data);
        data = (unsigned short *)malloc(num_bytes);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++) {
                    data[count] = (unsigned short)vol[x][y][z];
                    count++;
                }
        if (nim->data != NULL) free(nim->data);
        nim->data = malloc(num_bytes);
        memcpy(nim->data, data, num_bytes);
        free(data);
    } else if (nim->datatype == DT_UINT32) {
        unsigned int * data;
        num_bytes = dims[0]*dims[1]*dims[2]*sizeof(*data);
        nim->nbyper = sizeof(*data);
        data = (unsigned int *)malloc(num_bytes);
        for (size_t z = 0; z < dims[2]; z++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t x = 0; x < dims[0]; x++) {
                    data[count] = (unsigned int)vol[x][y][z];
                    count++;
                }
        if (nim->data != NULL) free(nim->data);
        nim->data = malloc(num_bytes);
        memcpy(nim->data, data, num_bytes);
        free(data);
    } else {
        fprintf(stderr, "Datatype: %hd not supported\n",
                        nim->datatype);
        exit(1);
    }

}

template <class T> T **** read_4D_textfile(const char * file,
                                           size_t * dims,
                                           size_t * num_out)
{
    FILE * fp = fopen(file, "r");
    char fnames[256][512];
    size_t num = 0;

    if (fp != NULL) {
        while (fgets(fnames[num], 512, fp) != NULL) {

            // remove the newline
            int len = strlen(fnames[num]);
            if (fnames[num][len-1] == '\n')
                fnames[num][len-1] = 0;

            // check if the file exists
            if (fopen(fnames[num], "r") != NULL &&
                nifti_is_complete_filename(fnames[num])) {
                num++;
            } else {
                if (len > 1)
                    fprintf(stdout, "Ignoring: %s\n", fnames[num]);
            }
        }
    } else {
        perror(file);
    }

    // allocate the temporary data
    T *** data;
    T **** vol;

    for (size_t j = 0; j < num; j++) {

        // read the nifti file
        nifti_image * nim = nifti_image_read(fnames[j], 1);

        // set the dimensions of the target
        if (j == 0) {

            // set the dimensions
            for (size_t i = 0; i < 3; i++)
                dims[i] = (size_t)nim->dim[i+1];

            // allocate the space
            data = (T ***)malloc(dims[0] * sizeof(*data));
            vol = (T ****)malloc(dims[0] * sizeof(*vol));
            for (size_t x = 0; x < dims[0]; x++) {
                data[x] = (T **)malloc(dims[1] * sizeof(*data[x]));
                vol[x] = (T ***)malloc(dims[1] * sizeof(*vol[x]));
                for (size_t y = 0; y < dims[1]; y++) {
                    data[x][y] = (T *)malloc(dims[2] * sizeof(*data[x][y]));
                    vol[x][y] = (T **)malloc(dims[2] * sizeof(*vol[x][y]));
                    for (size_t z = 0; z < dims[2]; z++)
                        vol[x][y][z] = (T *)malloc(num * sizeof(*vol[x][y][z]));
                }
            }

        } else {
            for (size_t i = 0; i < 3; i++)
                if (dims[i] != (size_t)nim->dim[i+1]) {
                    fprintf(stderr, "Error: atlas dims do not match target\n");
                    exit(1);
                }
        }

        // set the data
        get_nim_3D<T>(nim, data, dims);

        // free the nifti image
        nifti_image_free(nim);

        // copy the data over
        for (size_t x = 0; x < dims[0]; x++)
            for (size_t y = 0; y < dims[1]; y++)
                for (size_t z = 0; z < dims[2]; z++)
                    vol[x][y][z][j] = data[x][y][z];
    }

    // free the temporary data
    for (size_t x = 0; x < dims[0]; x++) {
        for (size_t y = 0; y < dims[1]; y++)
            free(data[x][y]);
        free(data[x]);
    }
    free(data);

    // set the output number
    *num_out = num;

    return(vol);
}

template <class T> T *** allocate_3D_data(const size_t * d)
{
    T *** data;
    data = (T ***)malloc(d[0] * sizeof(*data));
    for (size_t x = 0; x < d[0]; x++) {
        data[x] = (T **)malloc(d[1] * sizeof(*data[x]));
        for (size_t y = 0; y < d[1]; y++) {
            data[x][y] = (T *)malloc(d[2] * sizeof(*data[x][y]));
        }
    }
    return(data);
}

template <class T> T **** allocate_4D_data(const size_t * d,
                                           const size_t d4,
                                           bool *** ignore)
{
    T **** data;
    data = (T ****)malloc(d[0] * sizeof(*data));
    for (size_t x = 0; x < d[0]; x++) {
        data[x] = (T ***)malloc(d[1] * sizeof(*data[x]));
        for (size_t y = 0; y < d[1]; y++) {
            data[x][y] = (T **)malloc(d[2] * sizeof(*data[x][y]));
            for (size_t z = 0; z < d[2]; z++)
                if (ignore[x][y][z] == false)
                    data[x][y][z] = (T *)malloc(d4 * sizeof(*data[z][y][z]));
        }
    }
    return(data);
}

template <class T> T ***** allocate_5D_data(const size_t * d,
                                            const size_t d4,
                                            const size_t d5,
                                            bool *** ignore)
{
    T ***** data;
    data = (T *****)malloc(d[0] * sizeof(*data));
    for (size_t x = 0; x < d[0]; x++) {
        data[x] = (T ****)malloc(d[1] * sizeof(*data[x]));
        for (size_t y = 0; y < d[1]; y++) {
            data[x][y] = (T ***)malloc(d[2] * sizeof(*data[x][y]));
            for (size_t z = 0; z < d[2]; z++)
                if (ignore[x][y][z] == false) {
                    data[x][y][z] = (T **)malloc(d4 * sizeof(*data[x][y][z]));
                    for (size_t v = 0; v < d4; v++)
                        data[x][y][z][v] =
                            (T *)malloc(d5 * sizeof(*data[x][y][z][v]));
                }
        }
    }
    return(data);
}

template <class T> void free_3D_data(T *** data,
                                     const size_t * dims)
{
    for (size_t x = 0; x < dims[0]; x++) {
        for (size_t y = 0; y < dims[1]; y++)
            free(data[x][y]);
        free(data[x]);
    }
    free(data);
}

template <class T> void free_4D_data(T **** data,
                                     const size_t * dims)
{
    for (size_t x = 0; x < dims[0]; x++) {
        for (size_t y = 0; y < dims[1]; y++) {
            for (size_t z = 0; z < dims[2]; z++) {
                free(data[x][y][z]);
            }
            free(data[x][y]);
        }
        free(data[x]);
    }
    free(data);
}

template <class T> void free_4D_data(T **** data,
                                     const size_t * dims,
                                     bool *** ignore)
{
    for (size_t x = 0; x < dims[0]; x++) {
        for (size_t y = 0; y < dims[1]; y++) {
            for (size_t z = 0; z < dims[2]; z++) {
                if (ignore[x][y][z] == false)
                    free(data[x][y][z]);
            }
            free(data[x][y]);
        }
        free(data[x]);
    }
    free(data);
}

template <class T> void free_5D_data(T ***** data,
                                     const size_t * dims,
                                     const size_t d4,
                                     bool *** ignore)
{
    for (size_t x = 0; x < dims[0]; x++) {
        for (size_t y = 0; y < dims[1]; y++) {
            for (size_t z = 0; z < dims[2]; z++) {
                if (ignore[x][y][z] == false) {
                    for (size_t v = 0; v < d4; v++)
                        free(data[x][y][z][v]);
                    free(data[x][y][z]);
                }
            }
            free(data[x][y]);
        }
        free(data[x]);
    }
    free(data);
}

intensity_t *** get_target(const char * file,
                           size_t * dims);

label_t *** get_est(const char * file,
                    const size_t * dims,
                    const size_t num_labels);

label_t *** get_est2(const char * file,
                     const size_t * dims,
                     size_t * num_labels);

intensity_t **** get_ims(const char * file,
                         const size_t * dims,
                         const size_t num_raters);

label_t **** get_obs(const char * file,
                     const size_t * dims,
                     size_t * num_raters,
                     size_t * num_labels);

#endif
