
#include "TumorMassEffectGenerator.h"

#include "itkBSplineInterpolateImageFunction.h"
#include "itkLinearInterpolateImageFunction.h"
#include "itkNearestNeighborInterpolateImageFunction.h"

#include "itkImageRegionIteratorWithIndex.h"

#include "itkDiscreteGaussianImageFilter.h"
#include "itkSignedDanielssonDistanceMapImageFilter.h"

#if ITK_VERSION_MAJOR < 4
  #include "itkIterativeInverseDeformationFieldImageFilter.h"
#else
  #include "itkIterativeInverseDisplacementFieldImageFilter.h"
#endif

#include "itkVTKImageExport.h"

#include "itkWarpVectorImageFilter.h"

#include "vtkContourFilter.h"
#include "vtkIdList.h"
#include "vtkImageData.h"
#include "vtkImageImport.h"
#include "vtkPoints.h"
#include "vtkPointLocator.h"
#include "vtkSmartPointer.h"

#include "createMesh3D.h"

#include "vnl/vnl_math.h"
#include "vnl/algo/vnl_determinant.h"

#include "muException.h"
#include "DynArray.h"
#include "Log.h"

#include "MersenneTwisterRNG.h"

#include "ConnectedComponentsFilter.h"
#include "InverseDeformationFilter.h"
#include "SubsampledImageRegionIterator.h"
//#include "VectorMeanImageFilter.h"
#include "VectorBlurImageFilter.h"
#include "VonMisesFisherDistribution.h"

#include <stdexcept>

#define BLUR_DISTANCEMAPS 1

#define DISTANCE_EPS 0.1

#define DO_JACOBI 1

#define SOLVER_AUG_BC 1
#define SOLVER_USE_AVERAGING 0

// Maximum displacement magnitude in one iteration
#define MAX_DISP 50.0

TumorMassEffectGenerator
::TumorMassEffectGenerator()
{
  m_InitialMesh = 0;
  m_CurrentMesh = 0;

  m_InitialVTKMesh = 0;

  m_DeformationIterations = 5;

  m_Pressure = 1e-3;

  m_VMFKappa = 50.0;

  this->SetBrainMaterialParameters(694.0, 0.4);
  this->SetFalxMaterialParameters(200000.0, 0.4);

  m_PointTolerance = 0.5;

  m_UseQHull = true;

  m_Damping = 0.95;

  m_DeformationSolverIterations = 10;

  m_NumberOfThreads = 2;
}

TumorMassEffectGenerator
::~TumorMassEffectGenerator()
{
  delete m_InitialMesh;
  delete m_CurrentMesh;
}

TumorMassEffectGenerator::MatrixType
TumorMassEffectGenerator
::BuildElasticityMatrix(float E, float nu)
{
  if (nu < 0)
    nu = 0;
  if (nu >= 0.499999)
    nu = 0.499999;

  if (E < 1e-20)
    E = 1e-20;

  float lambda = nu*E / ((1.0+nu)*(1-2.0*nu));
  float mu = E / (2.0 * (1.0 + nu));

  MatrixType tensorM(6, 6, 0.0);

  for (int j = 0; j < 3; j++)
    tensorM(j, j) = lambda + 2.0*mu;
  for (int j = 3; j < 6; j++)
    tensorM(j, j) = mu;
  
  for (int j = 0; j < 3; j++)
    for (int k = j+1; k < 3; k++)
    {
      tensorM(j, k) = lambda;
      tensorM(k, j) = lambda;
    }

  return tensorM;
}

void
TumorMassEffectGenerator
::SetBrainMaterialParameters(float E, float nu)
{
  m_BrainElasticityMatrix = this->BuildElasticityMatrix(E, nu);
  m_DuraElasticityMatrix = m_BrainElasticityMatrix;
}

void
TumorMassEffectGenerator
::SetFalxMaterialParameters(float E, float nu)
{
  m_FalxElasticityMatrix = this->BuildElasticityMatrix(E, nu);
}

void
TumorMassEffectGenerator
::SetLabelImage(ByteImagePointer labelImg)
{
  typedef itk::SignedDanielssonDistanceMapImageFilter<
    ByteImageType, FloatImageType> DistanceMapFilterType;

  typedef itk::DiscreteGaussianImageFilter<
    FloatImageType, FloatImageType> SmootherType;

  typedef itk::ImageRegionIteratorWithIndex<ByteImageType>
    LabelIteratorType;

  m_LabelImage = labelImg;

  // Compute distance functions to brain tissue (wm, gm, falx, tumor)
  // Mark as sliding BC
  ByteImagePointer brainMask = ByteImageType::New();
  brainMask->CopyInformation(m_LabelImage);
  brainMask->SetRegions(m_LabelImage->GetLargestPossibleRegion());
  brainMask->Allocate();
  brainMask->FillBuffer(0);

  LabelIteratorType labelIt(m_LabelImage, m_LabelImage->GetRequestedRegion());

  for(labelIt.GoToBegin(); !labelIt.IsAtEnd(); ++labelIt)
  {
    unsigned char c = labelIt.Get();
    if (c != 0)
      brainMask->SetPixel(labelIt.GetIndex(), 1);
  }

  // TODO: morphological closing on mask

  DistanceMapFilterType::Pointer distanceMapFilter =
    DistanceMapFilterType::New();

  distanceMapFilter->InsideIsPositiveOff();
  distanceMapFilter->SetInput(brainMask);
  distanceMapFilter->SquaredDistanceOff();
  distanceMapFilter->UseImageSpacingOn();
    
  distanceMapFilter->Update();

#if BLUR_DISTANCEMAPS
  SmootherType::Pointer smoother = SmootherType::New();
  smoother->SetInput(distanceMapFilter->GetDistanceMap());
  smoother->SetVariance(1.0);
  smoother->Update();

  m_BrainDistanceImage = smoother->GetOutput();
#else
  m_BrainDistanceImage = distanceMapFilter->GetDistanceMap();
#endif
}

unsigned int
TumorMassEffectGenerator
::GetNextElement()
{
  unsigned int el = 0;
  m_Mutex.Lock();
  el = m_ElementCounter++;
  m_Mutex.Unlock();
  return el;
}

void
TumorMassEffectGenerator
::ModifyInitialMesh()
{
  unsigned int numPoints = m_InitialVTKMesh->GetNumberOfPoints();

  FloatImageSizeType size = m_LabelImage->GetLargestPossibleRegion().GetSize();
  FloatImageSpacingType spacing = m_LabelImage->GetSpacing();

  vtkSmartPointer<vtkPoints> modifiedPoints = vtkSmartPointer<vtkPoints>::New();
  modifiedPoints->Allocate(numPoints);

  double bounds[6];
  for (int dim = 0; dim < 3; dim++)
  {
    bounds[2*dim] = 0.0;
    bounds[2*dim + 1] = size[dim]*spacing[dim];
  }

  vtkSmartPointer<vtkPointLocator> pLoc =
    vtkSmartPointer<vtkPointLocator>::New();
  pLoc->SetTolerance(m_PointTolerance);
  pLoc->InitPointInsertion(modifiedPoints, bounds, numPoints);

  // Compute distance function for tumor
  typedef itk::SignedDanielssonDistanceMapImageFilter<
    //ByteImageType, FloatImageType> DistanceMapFilterType;
    FloatImageType, FloatImageType> DistanceMapFilterType;

  typedef itk::DiscreteGaussianImageFilter<
    FloatImageType, FloatImageType> SmootherType;

  typedef itk::BSplineInterpolateImageFunction<FloatImageType, double>
    InterpolatorType;

  //ByteImagePointer tumorMask = ByteImageType::New();
  FloatImagePointer tumorMask = FloatImageType::New();
  tumorMask->CopyInformation(m_LabelImage);
  tumorMask->SetRegions(m_LabelImage->GetLargestPossibleRegion());
  tumorMask->Allocate();
  tumorMask->FillBuffer(0);

  typedef itk::ImageRegionIteratorWithIndex<ByteImageType>
    LabelIteratorType;

  LabelIteratorType labelIt(
    m_LabelImage, m_LabelImage->GetLargestPossibleRegion());

  bool got_tumor = false;

  labelIt.GoToBegin();
  while(!labelIt.IsAtEnd())
  {
    unsigned char c = labelIt.Get();
    if (c == 5)
    {
      tumorMask->SetPixel(labelIt.GetIndex(), 1);
      got_tumor = true;
    }
    ++labelIt;
  }

  DistanceMapFilterType::Pointer distanceMapFilter =
    DistanceMapFilterType::New();

  distanceMapFilter->InsideIsPositiveOff();
  distanceMapFilter->SetInput(tumorMask);
  distanceMapFilter->SquaredDistanceOff();
  distanceMapFilter->UseImageSpacingOn();
  
  distanceMapFilter->Update();

#if BLUR_DISTANCEMAPS
  SmootherType::Pointer smoother = SmootherType::New();
  smoother->SetInput(distanceMapFilter->GetDistanceMap());
  smoother->SetVariance(1.0);
  smoother->Update();

  FloatImagePointer distMap = smoother->GetOutput();
#else
  FloatImagePointer distMap = distanceMapFilter->GetDistanceMap();
#endif

  if (!got_tumor)
  {
    muLogMacro(<< ">->->- WARNING: no tumor region detected\n");
    distMap->FillBuffer(1000);
  }

  InterpolatorType::Pointer distMapInterp = InterpolatorType::New();
  distMapInterp->SetInputImage(distMap);
  distMapInterp->SetSplineOrder(3);

  double x[3];
 
  // Insert non-tumor points
  for (unsigned int i = 0; i < numPoints; i++)
  {
    m_InitialVTKMesh->GetPoint(i, x);

    FloatImagePointType p;
    p[0] = x[0];
    p[1] = x[1];
    p[2] = x[2];

    if (!distMapInterp->IsInsideBuffer(p))
      continue;

    float phi = distMapInterp->Evaluate(p);

    if (phi <= DISTANCE_EPS)
      continue;

    vtkIdType id = 0;
    pLoc->InsertUniquePoint(x, id);
  }

  // Process grid and insert inner tumor boundary points
  //typedef SubsampledImageRegionIterator<FloatImageType> IteratorType;
  //IteratorType distIt(distMap, distMap->GetLargestPossibleRegion(), 2.0);
  typedef itk::ImageRegionIteratorWithIndex<FloatImageType> IteratorType;
  IteratorType distIt(distMap, distMap->GetLargestPossibleRegion());

  distIt.GoToBegin();
  while (!distIt.IsAtEnd())
  {
    float phi = distIt.Get();

    FloatImageIndexType ind = distIt.GetIndex();

    FloatImagePointType p;
    distMap->TransformIndexToPhysicalPoint(ind, p);

    //if (phi <= 0) // Insert all internal tumor points?
    if (phi < 0.0 && phi > (-2.0*DISTANCE_EPS))
    {
      vtkIdType id = -1;

      x[0] = p[0];
      x[1] = p[1];
      x[2] = p[2];
      pLoc->InsertUniquePoint(x, id);
    }

    ++distIt;
  }

  typedef itk::VTKImageExport<FloatImageType> ITKExportType;
  ITKExportType::Pointer itkexport = ITKExportType::New();
  itkexport->SetInput(tumorMask);
  itkexport->Update();
  
  // See InsightApplications/Auxialiary/vtk/itkImageToVTKImageFilter
  vtkImageImport* vtkimport = vtkImageImport::New();
  vtkimport->SetUpdateInformationCallback(
    itkexport->GetUpdateInformationCallback());
  vtkimport->SetPipelineModifiedCallback(
    itkexport->GetPipelineModifiedCallback());
  vtkimport->SetWholeExtentCallback(itkexport->GetWholeExtentCallback());
  vtkimport->SetSpacingCallback(itkexport->GetSpacingCallback());
  vtkimport->SetOriginCallback(itkexport->GetOriginCallback());
  vtkimport->SetScalarTypeCallback(itkexport->GetScalarTypeCallback());
  vtkimport->SetNumberOfComponentsCallback(itkexport->GetNumberOfComponentsCallback());
  vtkimport->SetPropagateUpdateExtentCallback(itkexport->GetPropagateUpdateExtentCallback());
  vtkimport->SetUpdateDataCallback(itkexport->GetUpdateDataCallback());
  vtkimport->SetDataExtentCallback(itkexport->GetDataExtentCallback());
  vtkimport->SetBufferPointerCallback(itkexport->GetBufferPointerCallback());
  vtkimport->SetCallbackUserData(itkexport->GetCallbackUserData());

  vtkSmartPointer<vtkContourFilter> contourf =
    vtkSmartPointer<vtkContourFilter>::New();
  contourf->SetInput(vtkimport->GetOutput());
  contourf->SetNumberOfContours(1);
  contourf->SetValue(0, 1.0);
  contourf->ComputeNormalsOff();
  contourf->ComputeGradientsOff();

  contourf->Update();

  vtkPolyData* contourPD = contourf->GetOutput();

  for (vtkIdType k = 0; k < contourPD->GetNumberOfPoints(); k++)
  { 
    double y[3];
    contourPD->GetPoint(k, y);

    vtkIdType id = -1;
    pLoc->InsertUniquePoint(y, id);
  }

/*
  // Insert tumor centroids
  // Can be useful for infil simulation phase
  for (unsigned int i = 0; i < m_TumorCentroids.GetSize(); i++)
  {
    vtkIdType id = -1;

    VectorType centroid = m_TumorCentroids[i];

    double c[3];
    c[0] = centroid[0];
    c[1] = centroid[1];
    c[2] = centroid[2];

    pLoc->InsertUniquePoint(c, id);
  }
*/

  // Retesselation of the points, with tumor points added
  delete m_InitialMesh;
  m_InitialMesh = new LinearTetrahedralMesh();
  m_InitialMesh->SetVTKMesh(
    createMesh3D(pLoc->GetPoints(), m_BrainDistanceImage, m_UseQHull) );

}

TumorMassEffectGenerator::VectorType
TumorMassEffectGenerator
::ComputeSolution(const VectorType& x)
{
  unsigned int numPoints = m_InitialMesh->GetNumberOfPoints();
  unsigned int numElements = m_InitialMesh->GetNumberOfElements();

  vtkUnstructuredGrid* initialVTKMesh = m_InitialMesh->GetVTKMesh();

  VectorType sol(3, 0);

  // Find closest point (uses Kd tree)
  LinearTetrahedralMesh::PointType q;
  for (unsigned int dim = 0; dim < 3; dim++)
    q[dim] = x[dim];

  unsigned int numSearchPts = 4;

  LinearTetrahedralMesh::PointIDVectorType closestPoints =
    m_InitialMesh->FindClosestPoints(q, numSearchPts);

  // Only consider elements that contain the closest point/node
  bool foundEl = false;

  for (unsigned int i = 0; i < numSearchPts; i++)
  {
    vtkSmartPointer<vtkIdList> cellIds = vtkSmartPointer<vtkIdList>::New();

    unsigned int closestPt = closestPoints[i];

    initialVTKMesh->GetPointCells(closestPt, cellIds);

    for (unsigned int j = 0; j < cellIds->GetNumberOfIds(); j++)
    {
      unsigned int el = cellIds->GetId(j);

      VectorType N = m_InitialMesh->ComputeShapeFunctions(el, x);

      // Inside/outside test
      float minN = N[0];
      for (int k = 1; k < 4; k++)
      {
        if (N[k] < minN)
          minN = N[k];
      }
      if (minN < -0.001)
        continue;

      vtkSmartPointer<vtkIdList> ptIds = vtkSmartPointer<vtkIdList>::New();

      initialVTKMesh->GetCellPoints(el, ptIds);

      if (ptIds->GetNumberOfIds() != 4)
        continue;

      for (unsigned int k = 0; k < ptIds->GetNumberOfIds(); k++)
        sol += m_NodeCompSolutions.get_row(ptIds->GetId(k)) * N[k];

      foundEl = true;

      // A global point can only be inside one tetrahedron
      break;
    }

    if (foundEl)
      break;

  } // for i

  // Sanity checks
  for (int dim = 0; dim < 3; dim++)
  {
    if (vnl_math_isnan(sol[dim]))
      sol[dim] = 0.0;
    if (vnl_math_isinf(sol[dim]))
      sol[dim] = 0.0;
  }

  return sol;

}

void
TumorMassEffectGenerator
::ComputeForces(MatrixType& solutions)
{
  muLogMacro(<< "    Computing surface forces with pressure = "
    << m_Pressure*1e+3 << " kPa\n");

  unsigned int numPoints = m_CurrentMesh->GetNumberOfPoints();
  unsigned int numElements = m_CurrentMesh->GetNumberOfElements();

  unsigned int numPressureSurfs = m_TumorPressureSurfaces.GetSize();

  vtkSmartPointer<vtkUnstructuredGrid> currVTKMesh =
    m_CurrentMesh->GetVTKMesh();

  solutions.set_size(numPoints, 3);
  solutions.fill(0.0);

  //float maxArea = 0.0;
  //float sumArea = 0.0;

  for (unsigned int i = 0; i < numPressureSurfs; i++)
  {
    unsigned int* surfPtIds = m_TumorPressureSurfaces[i].ids;

    //
    // Compute force direction using plane normals
    //

    double p[3];
    double q[3];
    double r[3];
    currVTKMesh->GetPoint(surfPtIds[0], p);
    currVTKMesh->GetPoint(surfPtIds[1], q);
    currVTKMesh->GetPoint(surfPtIds[2], r);

    VectorType a(3);
    VectorType b(3);
    for (int d = 0; d < 3; d++)
    {
      a[d] = q[d] - p[d];
      b[d] = r[d] - p[d];
    }

    VectorType dir(3); // Cross product
    dir[0] = a[1]*b[2] - a[2]*b[1];
    dir[1] = a[2]*b[0] - a[0]*b[2];
    dir[2] = a[0]*b[1] - a[1]*b[0];

    float dirMag = dir.magnitude();

    float area = 0.5 * dirMag;

    if (dirMag < 1e-20)
      continue;
    dir /= dirMag;

    //if (area > maxArea)
    //  maxArea = area;
    //sumArea += area;

    VonMisesFisherDistribution vmf(dir, m_VMFKappa);
    VectorType rv = vmf.Generate();

    VectorType f = rv * (m_Pressure * area);

    // Surface force is equally distributed among the three points
    f /= 3.0;

    // Accumulate forces
    for (int j = 0; j < 3; j++)
      for (int k = 0; k < 3; k++)
        solutions(surfPtIds[j], k) += f[k];

  } // for each surf

//std::cout << "Max tri area = " << maxArea << std::endl;
//std::cout << "Avg tri area = " << sumArea / numPressureSurfs << std::endl;

/*
  // DEBUG: make sure no forces acting outward of sliding BC
  typedef itk::BSplineInterpolateImageFunction<FloatImageType, double>
    InterpolatorType;
  InterpolatorType::Pointer brainDistInterp = InterpolatorType::New();
  brainDistInterp->SetInputImage(m_BrainDistanceImage);

  for (unsigned int k = 0; k < numPoints; k++)
  {
    if (m_NodeBCMarkers[k] != SlidingBC)
      continue;

    VectorType f_k = solutions.get_row(k);

    // Compute normal at node point
    double x[3];
    m_CurrentMesh->GetVTKMesh()->GetPoint(k, x);

    FloatImagePointType p;
    p[0] = x[0];
    p[1] = x[1];
    p[2] = x[2];

    if (!brainDistInterp->IsInsideBuffer(p))
      continue;

    // Obtain gradient
    InterpolatorType::CovariantVectorType gradPhi =
      brainDistInterp->EvaluateDerivative(p);

    // Normalize gradient
    float norm = gradPhi.GetNorm();
    if (norm < 1e-20)
    {
      // Zero force if normal is undefined
      for (int dim = 0; dim < 3; dim++)
        solutions(k, dim) = 0.0;
      continue;
    }
    gradPhi /= norm;

    // Subtract the component along normal direction
    float dotProduct = 0;
    for (int dim = 0; dim < 3; dim++)
      dotProduct += f_k[dim] * gradPhi[dim];
    for (int dim = 0; dim < 3; dim++)
      f_k[dim] -= dotProduct * gradPhi[dim];

    solutions.set_row(k, f_k);
  }
*/

  // Smoothen the surface forces (Laplacian-type)
  MatrixType smoothSol(numPoints, 3, 0.0);
  VectorType smoothWeights(numPoints, 1e-10);

  for (unsigned int el = 0; el < numElements; el++)
  {
    vtkSmartPointer<vtkIdList> ptIds = vtkSmartPointer<vtkIdList>::New();

    m_CurrentMesh->GetVTKMesh()->GetCellPoints(el, ptIds);

    if (ptIds->GetNumberOfIds() != 4)
      continue;

    float vol = m_CurrentMesh->ComputeElementVolume(el);

    VectorType f_ave(3, 0.0);

    for (unsigned int i = 0; i < 4; i++)
    {
      unsigned int global_i = ptIds->GetId(i);
      f_ave += solutions.get_row(global_i);
    }

    f_ave /= 4.0;
    f_ave *= vol;

    for (unsigned int i = 0; i < 4; i++)
    {
      unsigned int global_i = ptIds->GetId(i);
      smoothSol.set_row(global_i,
        smoothSol.get_row(global_i) + f_ave);
      smoothWeights[global_i] += vol;
    }
  } 

  for (unsigned int i = 0; i < numPoints; i++)
  {
    solutions.set_row(i, smoothSol.get_row(i) / smoothWeights[i]);
  }

}

void
TumorMassEffectGenerator
::ComputeTumorCentroids()
{
  typedef itk::ImageRegionIteratorWithIndex<ByteImageType>
    LabelIterator;

  LabelIterator labelIt(
    m_LabelImage, m_LabelImage->GetLargestPossibleRegion());

  ByteImagePointer tumorMask = ByteImageType::New();
  tumorMask->CopyInformation(m_LabelImage);
  tumorMask->SetRegions(m_LabelImage->GetLargestPossibleRegion());
  tumorMask->Allocate();
  tumorMask->FillBuffer(0);

  for (labelIt.GoToBegin(); !labelIt.IsAtEnd(); ++labelIt)
  {
    unsigned char label = labelIt.Get();
    if (label == 5)
      tumorMask->SetPixel(labelIt.GetIndex(), 1);
  }

  typedef ConnectedComponentsFilter<ByteImageType, ByteImageType> CCType;
  CCType::Pointer ccfilt = CCType::New();
  ccfilt->SetInput(tumorMask);
  ccfilt->Update();

  unsigned int numSeeds = 0;

  ByteImagePointer ccImg = ccfilt->GetOutput();
  for (labelIt.GoToBegin(); !labelIt.IsAtEnd(); ++labelIt)
  {
    ByteImageIndexType ind = labelIt.GetIndex();
    unsigned int c = ccImg->GetPixel(ind);
    if (c > numSeeds)
      numSeeds = c;
  }

  //if (numSeeds == 0)
  //  muExceptionMacro(<< "No tumor seed component detected");

//std::cout << "Detected " << numSeeds << " separate tumor seed components" << std::endl;

  m_TumorCentroids.Clear();
  for (unsigned int s = 1; s <= numSeeds; s++)
  {
    VectorType center(3, 0.0);

    unsigned int count = 0;

    for (labelIt.GoToBegin(); !labelIt.IsAtEnd(); ++labelIt)
    {
      ByteImageIndexType ind = labelIt.GetIndex();
      unsigned int c = ccImg->GetPixel(ind);
      if (c == s)
      {
        ByteImagePointType p;
        m_LabelImage->TransformIndexToPhysicalPoint(ind, p);

        for (unsigned int k = 0; k < 3; k++)
          center[k] += p[k];
        count++;
      }
    }

    if (count == 0)
      continue;

    center /= count;
    m_TumorCentroids.Append(center);

  }

}

void
TumorMassEffectGenerator
::DetermineBCs()
{

//std::cout << "Determining node BCs..." << std::endl;

  unsigned int numPoints = m_CurrentMesh->GetNumberOfPoints();
  unsigned int numElements = m_CurrentMesh->GetNumberOfElements();

  vtkUnstructuredGrid* currVTKMesh = m_CurrentMesh->GetVTKMesh();

  typedef itk::SignedDanielssonDistanceMapImageFilter<
    ByteImageType, FloatImageType> DistanceMapFilterType;

  typedef itk::DiscreteGaussianImageFilter<
    FloatImageType, FloatImageType> SmootherType;

  typedef itk::NearestNeighborInterpolateImageFunction<ByteImageType, double>
    LabelInterpolatorType;

  typedef itk::BSplineInterpolateImageFunction<FloatImageType, double>
    InterpolatorType;

  typedef itk::ImageRegionIteratorWithIndex<ByteImageType>
    LabelIterator;

  LabelIterator labelIt(m_LabelImage, m_LabelImage->GetRequestedRegion());

  m_NodeLabels.Clear();
  m_NodeLabels.Initialize(numPoints, 0);

  LabelInterpolatorType::Pointer labelInterp = LabelInterpolatorType::New();
  labelInterp->SetInputImage(m_LabelImage);

  for (unsigned int i = 0; i < numPoints; i++)
  {
    double x[3];
    currVTKMesh->GetPoint(i, x);

    ByteImagePointType p;
    p[0] = x[0];
    p[1] = x[1];
    p[2] = x[2];

    if (!labelInterp->IsInsideBuffer(p))
    {
      m_NodeLabels[i] = 0;
      continue;
    }

    unsigned int c = (unsigned int)labelInterp->Evaluate(p);
    //if (c > 5)
    //  muExceptionMacro(<< "Unexpected label value " << c << "\n");
    m_NodeLabels[i] = c;
  }

  // Mark all points as being internal
  m_NodeBCMarkers = std::vector<BCEnumType>(numPoints, Internal);

  // Compute distance functions to brain tissue (wm, gm, falx, tumor)
  // Mark as sliding BC
  {
    InterpolatorType::Pointer distMapInterp = InterpolatorType::New();
    distMapInterp->SetInputImage(m_BrainDistanceImage);
    distMapInterp->SetSplineOrder(3);

    // Test each node and see if distance to boundary < eps
    double x[3];
    for (unsigned int k = 0; k < numPoints; k++)
    {
      currVTKMesh->GetPoint(k, x);

      FloatImagePointType p;
      p[0] = x[0];
      p[1] = x[1];
      p[2] = x[2];

      if (!labelInterp->IsInsideBuffer(p))
      {
        m_NodeBCMarkers[k] = FixedBC;
        continue;
      }

      unsigned int label = (unsigned int)labelInterp->Evaluate(p);
      //if (label > 5)
      //  muExceptionMacro(<< "Unexpected label value " << label << "\n");
      if (label == 3)
        continue;

      float phi = distMapInterp->Evaluate(p);

      // Fix points outside the brain
      if (phi > DISTANCE_EPS)
        m_NodeBCMarkers[k] = FixedBC;

      // Sliding BC at points near brain boundary
      if (fabs(phi) <= DISTANCE_EPS)
        m_NodeBCMarkers[k] = SlidingBC;
    }

  }

  // NOTE: No need, handled above?
  {
    ByteImagePointer ventricleMask = ByteImageType::New();
    ventricleMask->CopyInformation(m_LabelImage);
    ventricleMask->SetRegions(m_LabelImage->GetLargestPossibleRegion());
    ventricleMask->Allocate();
    ventricleMask->FillBuffer(0);

    labelIt.GoToBegin();
    while(!labelIt.IsAtEnd())
    {
      unsigned char c = labelIt.Get();
      if (c == 3)
        ventricleMask->SetPixel(labelIt.GetIndex(), 1);
      ++labelIt;
    }

    DistanceMapFilterType::Pointer distanceMapFilter =
      DistanceMapFilterType::New();

    distanceMapFilter->InsideIsPositiveOff();
    distanceMapFilter->SetInput(ventricleMask);
    distanceMapFilter->SquaredDistanceOff();
    distanceMapFilter->UseImageSpacingOn();
    
    distanceMapFilter->Update();

#if BLUR_DISTANCEMAPS
    SmootherType::Pointer smoother = SmootherType::New();
    smoother->SetInput(distanceMapFilter->GetDistanceMap());
    smoother->SetVariance(1.0);
    smoother->Update();

    FloatImagePointer ventDistMap = smoother->GetOutput();
#else
    FloatImagePointer ventDistMap = distanceMapFilter->GetDistanceMap();
#endif

    InterpolatorType::Pointer distMapInterp = InterpolatorType::New();
    distMapInterp->SetInputImage(ventDistMap);
    distMapInterp->SetSplineOrder(3);

    // Test each node and see if distance to boundary < eps
    double x[3];
    for (unsigned int k = 0; k < numPoints; k++)
    {
      currVTKMesh->GetPoint(k, x);

      FloatImagePointType p;
      p[0] = x[0];
      p[1] = x[1];
      p[2] = x[2];

      if (!labelInterp->IsInsideBuffer(p))
      {
        continue;
      }

      unsigned int label = (unsigned int)labelInterp->Evaluate(p);
      //if (label > 5)
      //  muExceptionMacro(<< "Unexpected label value " << label << "\n");
      if (label == 3)
        continue;

      float phi = distMapInterp->Evaluate(p);

      // No BC at points near ventricles
      if (fabs(phi) <= DISTANCE_EPS)
        m_NodeBCMarkers[k] = Internal;
    }
  }

  // Find the tumor pressure BCs, along with the triangle surfaces
  m_TumorPressureSurfaces.Clear();

  // Compute distance function for tumor
  {
    ByteImagePointer labelMask = ByteImageType::New();
    labelMask->CopyInformation(m_LabelImage);
    labelMask->SetRegions(m_LabelImage->GetLargestPossibleRegion());
    labelMask->Allocate();
    labelMask->FillBuffer(0);

    labelIt.GoToBegin();
    while(!labelIt.IsAtEnd())
    {
      unsigned char c = labelIt.Get();
      if (c == 5)
        labelMask->SetPixel(labelIt.GetIndex(), 1);
      ++labelIt;
    }

    DistanceMapFilterType::Pointer distanceMapFilter =
      DistanceMapFilterType::New();

    distanceMapFilter->InsideIsPositiveOff();
    distanceMapFilter->SetInput(labelMask);
    distanceMapFilter->SquaredDistanceOff();
    distanceMapFilter->UseImageSpacingOn();
    
    distanceMapFilter->Update();

#if BLUR_DISTANCEMAPS
    SmootherType::Pointer smoother = SmootherType::New();
    smoother->SetInput(distanceMapFilter->GetDistanceMap());
    smoother->SetVariance(1.0);
    smoother->Update();

    InterpolatorType::Pointer distMapInterp = InterpolatorType::New();
    distMapInterp->SetInputImage(smoother->GetOutput());
    distMapInterp->SetSplineOrder(3);
#else
    InterpolatorType::Pointer distMapInterp = InterpolatorType::New();
    distMapInterp->SetInputImage(distanceMapFilter->GetDistanceMap());
    distMapInterp->SetSplineOrder(3);
#endif

    double pt[3];

    // Go  through each element and test the four surfaces
    for (unsigned int el = 0; el < numElements; el++)
    {
      vtkSmartPointer<vtkIdList> ptIds = vtkSmartPointer<vtkIdList>::New();

      currVTKMesh->GetCellPoints(el, ptIds);

      if (ptIds->GetNumberOfIds() != 4)
        continue;

      unsigned int numBoundaryPt = 0;

      unsigned char boundMarks[] = {0, 0, 0, 0};

      for (unsigned int i = 0; i < 4; i++)
      {
        currVTKMesh->GetPoint(ptIds->GetId(i), pt);

        FloatImagePointType p;
        for (unsigned int j = 0; j < 3; j++)
          p[j] = pt[j];

        if (!distMapInterp->IsInsideBuffer(p))
          continue;

        float phi = distMapInterp->Evaluate(p);

        if (fabs(phi) <= DISTANCE_EPS)
        {
          unsigned int id = ptIds->GetId(i);
          if (m_NodeBCMarkers[id] == FixedBC
              || m_NodeBCMarkers[id] == SlidingBC)
            continue;
          m_NodeBCMarkers[id] = PressureBC;
          boundMarks[i] = 1;
          numBoundaryPt++;
        }
      }

      // To get a pressure surface we need three points on boundary
      if (numBoundaryPt != 3)
        continue;

      // Insert boundary surfaces
      for (unsigned int i = 0; i < 4; i++)
        for (unsigned int j = i+1; j < 4; j++)
          for (unsigned int k = j+1; k < 4; k++)
          {
            if (boundMarks[i] == 0 || boundMarks[j] == 0 || boundMarks[k] == 0)
              continue;

            TriangleSurface tri;
            tri.ids[0] = ptIds->GetId(i);
            tri.ids[1] = ptIds->GetId(j);
            tri.ids[2] = ptIds->GetId(k);

            double x_i[3];
            double x_j[3];
            double x_k[3];
            currVTKMesh->GetPoint(ptIds->GetId(i), x_i);
            currVTKMesh->GetPoint(ptIds->GetId(j), x_j);
            currVTKMesh->GetPoint(ptIds->GetId(k), x_k);

            VectorType a(3);
            VectorType b(3);
            for (int d = 0; d < 3; d++)
            {
              a[d] = x_j[d] - x_i[d];
              b[d] = x_k[d] - x_i[d];
            }

            VectorType crossp(3);
            crossp[0] = a[1]*b[2] - a[2]*b[1];
            crossp[1] = a[2]*b[0] - a[0]*b[2];
            crossp[2] = a[0]*b[1] - a[1]*b[0];

            //VectorType dir(3);
            //for (int d = 0; d < 3; d++)
            //  dir[d] = (x_i[d]+x_j[d]+x_k[d])/3.0 - m_TumorCentroid[d];

            VectorType x_mu(3, 0.0);
            for (int d = 0; d < 3; d++)
              x_mu[d] = (x_i[d]+x_j[d]+x_k[d])/3.0;

            VectorType dir = x_mu - m_TumorCentroids[0];
            float minCenterDist = dir.magnitude();
            for (unsigned int c = 1; c < m_TumorCentroids.GetSize(); c++)
            {
              VectorType dir_c = x_mu - m_TumorCentroids[c];
              float dist_c = dir_c.magnitude();
              if (dist_c < minCenterDist)
              {
                dir = dir_c;
                minCenterDist = dist_c;
              }
            }

            //InterpolatorType::CovariantVectorType gradphi =
            //  distMapInterp->EvaluateDerivative(x_mu);

            float dotp = 0;
            for (int d = 0; d < 3; d++)
              dotp += dir[d] * crossp[d];

            // Swap surface point ordering to make sure we get outward normal
            if (dotp < 0.0)
            {
              unsigned int temp_id = tri.ids[1];
              tri.ids[1] = tri.ids[2];
              tri.ids[2] = temp_id;
            }

            m_NodeBCMarkers[ptIds->GetId(i)] = PressureBC;
            m_NodeBCMarkers[ptIds->GetId(j)] = PressureBC;
            m_NodeBCMarkers[ptIds->GetId(k)] = PressureBC;

            m_TumorPressureSurfaces.Append(tri);
          }

    } // for el
  }

//std::cout << "# of pressure surfaces = " << m_TumorPressureSurfaces.GetSize() << std::endl;

}

void
TumorMassEffectGenerator
::UpdateSolutionsJacobi()
{

  unsigned int numPoints = m_CurrentMesh->GetNumberOfPoints();
  unsigned int numElements = m_CurrentMesh->GetNumberOfElements();

  // Initialize solution with zero displacements
  m_NodeSolutions = MatrixType(numPoints, 3, 0.0);

//std::cout << "Element volumes: \n" << elementVolumes << std::endl;

  // TODO: mark nodes based on orig labels
  // find csf and falx markers use appropriate mod and Poisson (near zero?)

  // Recompute element labeling, since tetrahedral element configuration
  // might change after displacement and Delaunay
  this->ComputeElementLabels();

  // Create interpolator for the brain distance image so we can get gradients
  typedef itk::BSplineInterpolateImageFunction<FloatImageType, double>
    InterpolatorType;

  InterpolatorType::Pointer brainDistInterp = InterpolatorType::New();
  brainDistInterp->SetInputImage(m_BrainDistanceImage);
  brainDistInterp->SetSplineOrder(3);

  DynArray<MatrixType> inverseK_iiList;
  inverseK_iiList.Allocate(numPoints);

  for (unsigned int i = 0; i < numPoints; i++)
  {
    vtkSmartPointer<vtkIdList> cellIds = vtkSmartPointer<vtkIdList>::New();

    m_CurrentMesh->GetVTKMesh()->GetPointCells(i, cellIds);

    MatrixType K_ii(3, 3, 0.0);

    vtkSmartPointer<vtkIdList> elPtIds = vtkSmartPointer<vtkIdList>::New();

    for (unsigned int j = 0; j < cellIds->GetNumberOfIds(); j++)
    {
      unsigned int el = cellIds->GetId(j);

      MatrixType delN = m_CurrentMesh->ComputeShapeFunctionDerivatives(el);

      m_CurrentMesh->GetVTKMesh()->GetCellPoints(el, elPtIds);

      if (elPtIds->GetNumberOfIds() != 4)
        continue;

      unsigned int local_i = 0;
      for (local_i = 0; local_i < elPtIds->GetNumberOfIds(); local_i++)
      {
        if (elPtIds->GetId(local_i) == i)
          break;
      }

      if (local_i >= elPtIds->GetNumberOfIds())
        continue;

      MatrixType B_i(6, 3, 0.0);

      B_i(0, 0) = delN(local_i, 0);
      B_i(1, 1) = delN(local_i, 1);
      B_i(2, 2) = delN(local_i, 2);

      B_i(3, 0) = delN(local_i, 1);
      B_i(3, 1) = delN(local_i, 0);

      B_i(4, 0) = delN(local_i, 2);
      B_i(4, 2) = delN(local_i, 0);

      B_i(5, 1) = delN(local_i, 2);
      B_i(5, 2) = delN(local_i, 1);

      MatrixType D = m_BrainElasticityMatrix;

      if (m_ElementLabels[el] == 3)
        continue; // Assume vent has zero Young mod and zero Poisson ratio
      if (m_ElementLabels[el] == 4)
        D = m_FalxElasticityMatrix;
      if (m_ElementLabels[el] == 9)
        D = m_DuraElasticityMatrix;
      if (m_ElementLabels[el] == 5)
        continue; // Assume internal tumor has no resistance

      MatrixType K_ii_el = B_i.transpose() * D * B_i;

      K_ii += K_ii_el * m_CurrentMesh->ComputeElementVolume(el);
    } // for j


#if SOLVER_AUG_BC
    // Augment Kii with [n^t 1] row, if sliding BC
    if (m_NodeBCMarkers[i] == SlidingBC)
    {
      // Compute normal at node point
      double x[3];
      m_CurrentMesh->GetVTKMesh()->GetPoint(i, x);

      FloatImagePointType p;
      p[0] = x[0];
      p[1] = x[1];
      p[2] = x[2];

      if (!brainDistInterp->IsInsideBuffer(p))
        continue;

      // Obtain gradient
      InterpolatorType::CovariantVectorType gradPhi =
        brainDistInterp->EvaluateDerivative(p);

      // Normalize gradient
      float norm = gradPhi.GetNorm();

      if (norm >= 1e-10)
        gradPhi /= norm;

// Kp = [K; n' 1]
// fp = [f 0]
// if n = 0, ignored automatically

      MatrixType augK_ii(4, 4, 0.0);
      for (int r = 0; r < 3; r++)
        for (int c = 0; c < 3; c++)
          augK_ii(r, c) = K_ii(r, c);

      for (int c = 0; c < 3; c++)
        augK_ii(3, c) = gradPhi[c];
      augK_ii(3, 3) = 1.0;

      K_ii = augK_ii;
    }
#endif

    // Check determinant
    float det = vnl_determinant(K_ii);

    if (det < 1e-10)
    {
      // In case of bad Kii elasticity matrix, generate zero displacements
      // i.e. inverse of Kii should be near zero
      MatrixType badK = K_ii; // Same size as K_ii
      badK.set_identity();
      badK *= 1e-10;
      //badK.fill(0.0);
      inverseK_iiList.Append(badK);
    }
    else
    {
      inverseK_iiList.Append(MatrixInverseType(K_ii));
    }

  } // for i

  // Compute iterative solution
  for (unsigned int iter = 1; iter <= m_DeformationSolverIterations; iter++)
  {

    // Preparation: u+ = f
    MatrixType nextSolutions(numPoints, 3);
    for (unsigned int k = 0; k < numPoints; k++)
      nextSolutions.set_row(k, m_CachedForceMatrix.get_row(k));

    // Next step: u+ -= K_ij * u_j
    // Jacobi: use u_j from previous step
    VectorType vec(numPoints*3, 0.0);
    for (unsigned int i = 0; i < numPoints; i++)
    {
      for (unsigned int d = 0; d < 3; d++)
        vec[i*3+d] = m_NodeSolutions(i, d);
    }
    vec = this->ProductKuNoDiagonal(vec);
    for (unsigned int i = 0; i < numPoints; i++)
    {
      for (unsigned int d = 0; d < 3; d++)
        nextSolutions(i, d) -= vec[i*3 + d];
    }

    // u+ = Kii^-1 * (f - sum_j{Kij*u_j})
    for (unsigned int i = 0; i < numPoints; i++)
    {
#if SOLVER_AUG_BC
      if (m_NodeBCMarkers[i] == FixedBC)
      {
        for (int d = 0; d < 3; d++)
          nextSolutions(i, d) = 0;
        continue;
      }

      // Use augmented matrix and vector in case of sliding BC
      if (m_NodeBCMarkers[i] == SlidingBC)
      {
// Kp = [K; n' 0]
// fp = [f 0]
// if n = 0, ignored automatically

        VectorType aug_nextSol(4, 0.0);
        for (int dim = 0; dim < 3; dim++)
          aug_nextSol[dim] = nextSolutions(i, dim);

        VectorType aug_unext_i =
          inverseK_iiList[i] * aug_nextSol;

        //float dotmag = aug_unext_i[3];

        for (int dim = 0; dim < 3; dim++)
          nextSolutions(i, dim) =  aug_unext_i[dim];
          // DEBUG
          //nextSolutions(i, dim) =  aug_unext_i[dim] - dotmag * n[dim];

        continue;
      } // if sliding bc
#endif

      VectorType unext_i =
        inverseK_iiList[i] * nextSolutions.get_row(i);

      nextSolutions.set_row(i, unext_i);

    } // for i

    // Compute difference between iterations
    float maxSolutionDiff = 0;
    float sumSolutionDiff = 0;
    for (unsigned int k = 0; k < numPoints; k++)
    {
      VectorType udiff = m_NodeSolutions.get_row(k) - nextSolutions.get_row(k);

      float diff = udiff.magnitude();
      if (diff > maxSolutionDiff)
        maxSolutionDiff = diff;

      sumSolutionDiff += diff;
    }

/*
    if ((iter % 10) == 1)
    {
      std::cout << "Solver iter " << iter << std::endl;
      std::cout << "Max sol diff = " << maxSolutionDiff 
        << ", sum = " << sumSolutionDiff << std::endl;
    }
*/

    // Assign the new solution
#if SOLVER_USE_AVERAGING
    if (iter > 1)
    {
      for (unsigned int k = 0; k < numPoints; k++)
      {
        VectorType u =
          (m_NodeSolutions.get_row(k) * 0.5) + (nextSolutions.get_row(k) * 0.5);
        m_NodeSolutions.set_row(k, u);
      }
    }
    else
    {
      m_NodeSolutions = nextSolutions;
    }
#else
    m_NodeSolutions = nextSolutions;
#endif

    // Convergence?
    if (maxSolutionDiff < 1e-5)
    {
//std::cout << "Assume convergence at iter " << iter << " with max diff = " << maxSolutionDiff << std::endl;
      break;
    }

  } // for iter

#if !SOLVER_AUG_BC
  // Apply BC
  for (unsigned int k = 0; k < numPoints; k++)
  {
    if (m_NodeBCMarkers[k] == FixedBC)
    {
      for (int d = 0; d < 3; d++)
         m_NodeSolutions(k, d) = 0;
      continue;
    }

    if (m_NodeBCMarkers[k] == SlidingBC)
    {
      VectorType u_k = m_NodeSolutions.get_row(k);

      // Compute normal at node point
      double x[3];
      m_CurrentMesh->GetVTKMesh()->GetPoint(k, x);

      FloatImagePointType p;
      p[0] = x[0];
      p[1] = x[1];
      p[2] = x[2];

      if (!brainDistInterp->IsInsideBuffer(p))
        continue;

      // Obtain gradient
      InterpolatorType::CovariantVectorType gradPhi =
        brainDistInterp->EvaluateDerivative(p);

      // Normalize gradient
      float norm = gradPhi.GetNorm();
      if (norm < 1e-20)
      {
        // Zero displacement solution if normal is undefined
        for (int dim = 0; dim < 3; dim++)
          m_NodeSolutions(k, dim) = 0.0;
        continue;
      }
      gradPhi /= norm;

      // Subtract the component along normal direction
      float dotProduct = 0;
      for (int dim = 0; dim < 3; dim++)
        dotProduct += u_k[dim] * gradPhi[dim];
      for (int dim = 0; dim < 3; dim++)
        u_k[dim] -= dotProduct * gradPhi[dim];

      m_NodeSolutions.set_row(k, u_k);
    }
  } // BC loop
#endif

  // Clamp displacement magnitudes
  for (unsigned int k = 0; k < numPoints; k++)
  {
    VectorType u_k = m_NodeSolutions.get_row(k);

    for (int dim = 0; dim < 3; dim++)
    {
      if (vnl_math_isnan(u_k[dim]))
        u_k[dim] = 0.0;
      if (vnl_math_isinf(u_k[dim]))
        u_k[dim] = 0.0;
    }

    float umag = u_k.magnitude();
    if (umag > MAX_DISP)
    {
      u_k *= (MAX_DISP / umag);
    }

    m_NodeSolutions.set_row(k, u_k);
  }

  // Smoothen the Jacobi solution
  MatrixType smoothSol(numPoints, 3, 0.0);
  VectorType smoothWeights(numPoints, 1e-10);

  for (unsigned int el = 0; el < numElements; el++)
  {
    vtkSmartPointer<vtkIdList> ptIds = vtkSmartPointer<vtkIdList>::New();

    m_CurrentMesh->GetVTKMesh()->GetCellPoints(el, ptIds);

    if (ptIds->GetNumberOfIds() != 4)
      continue;

    float vol = m_CurrentMesh->ComputeElementVolume(el);

    VectorType u_ave(3, 0.0);

    for (unsigned int i = 0; i < 4; i++)
    {
      unsigned int global_i = ptIds->GetId(i);
      u_ave += m_NodeSolutions.get_row(global_i);
    }

    u_ave /= 4.0;
    u_ave *= vol;

    for (unsigned int i = 0; i < 4; i++)
    {
      unsigned int global_i = ptIds->GetId(i);
      smoothSol.set_row(global_i,
        smoothSol.get_row(global_i) + u_ave);
      smoothWeights[global_i] += vol;
    }
  } 

  for (unsigned int i = 0; i < numPoints; i++)
    m_NodeSolutions.set_row(i, smoothSol.get_row(i) / smoothWeights[i]);

}

void
TumorMassEffectGenerator
::ComputeElementLabels()
{
  unsigned int numPoints = m_CurrentMesh->GetNumberOfPoints();
  unsigned int numElements = m_CurrentMesh->GetNumberOfElements();

  m_ElementLabels.Initialize(numElements, 0);

  for (unsigned int el = 0; el < numElements; el++)
  {
    // Use the majority of the markers of comprising nodes
    vtkSmartPointer<vtkIdList> ptIds = vtkSmartPointer<vtkIdList>::New();

    m_CurrentMesh->GetVTKMesh()->GetCellPoints(el, ptIds);

    std::vector<unsigned int> countLabels(10, 0);

    for (unsigned int k = 0; k < ptIds->GetNumberOfIds(); k++)
    {
      unsigned char label = m_NodeLabels[ptIds->GetId(k)];
      countLabels[label]++;
    }

    unsigned int maxLabel = 0;
    unsigned int maxCount = countLabels[0];
    for (unsigned int k = 1; k < countLabels.size(); k++)
      if (countLabels[k] > maxCount)
      {
        maxCount = countLabels[k];
        maxLabel = k;
      }

    m_ElementLabels[el] = maxLabel;

  }

}

void
TumorMassEffectGenerator
::DetermineSingularPoints()
{
  unsigned int numPoints = m_CurrentMesh->GetNumberOfPoints();
  unsigned int numElements = m_CurrentMesh->GetNumberOfElements();

  m_SingularPoints.Initialize(numPoints, 0);

  for (unsigned int i = 0; i < numPoints; i++)
  {
    vtkSmartPointer<vtkIdList> cellIds = vtkSmartPointer<vtkIdList>::New();

    m_CurrentMesh->GetVTKMesh()->GetPointCells(i, cellIds);

    MatrixType K_ii(3, 3, 0.0);

    vtkSmartPointer<vtkIdList> elPtIds = vtkSmartPointer<vtkIdList>::New();

    for (unsigned int j = 0; j < cellIds->GetNumberOfIds(); j++)
    {
      unsigned int el = cellIds->GetId(j);

      MatrixType delN = m_CurrentMesh->ComputeShapeFunctionDerivatives(el);

      m_CurrentMesh->GetVTKMesh()->GetCellPoints(el, elPtIds);

      if (elPtIds->GetNumberOfIds() != 4)
        continue;

      unsigned int local_i = 0;
      for (local_i = 0; local_i < elPtIds->GetNumberOfIds(); local_i++)
      {
        if (elPtIds->GetId(local_i) == i)
          break;
      }

      if (local_i >= elPtIds->GetNumberOfIds())
        continue;

      MatrixType B_i(6, 3, 0.0);

      B_i(0, 0) = delN(local_i, 0);
      B_i(1, 1) = delN(local_i, 1);
      B_i(2, 2) = delN(local_i, 2);

      B_i(3, 0) = delN(local_i, 1);
      B_i(3, 1) = delN(local_i, 0);

      B_i(4, 0) = delN(local_i, 2);
      B_i(4, 2) = delN(local_i, 0);

      B_i(5, 1) = delN(local_i, 2);
      B_i(5, 2) = delN(local_i, 1);

      MatrixType D = m_BrainElasticityMatrix;

      if (m_ElementLabels[el] == 3)
        continue; // Assume vent has zero Young mod and zero Poisson ratio
      if (m_ElementLabels[el] == 4)
        D = m_FalxElasticityMatrix;
      if (m_ElementLabels[el] == 9)
        D = m_DuraElasticityMatrix;
      if (m_ElementLabels[el] == 5)
        continue; // Assume internal tumor has no resistance

      MatrixType K_ii_el = B_i.transpose() * D * B_i;

      K_ii += K_ii_el * m_CurrentMesh->ComputeElementVolume(el);
    } // for j

    // Check determinant
    float det = vnl_determinant(K_ii);

    if (det < 1e-2)
      m_SingularPoints[i] = 1;
    else
      m_SingularPoints[i] = 0;

  } // for i

}

TumorMassEffectGenerator::VectorType
TumorMassEffectGenerator
::Precondition(const VectorType& u)
{
// Block Jacobi preconditioning

  unsigned int numPoints = m_CurrentMesh->GetNumberOfPoints();
  unsigned int numElements = m_CurrentMesh->GetNumberOfElements();

  if (u.size() != (numPoints*3))
    throw std::runtime_error(
      "[TumorMassEffectGenerator::Precondition] Invalid input vector size");

  VectorType condV(numPoints*3, 0.0);

  for (unsigned int i = 0; i < numPoints; i++)
  {
    vtkSmartPointer<vtkIdList> cellIds = vtkSmartPointer<vtkIdList>::New();

    m_CurrentMesh->GetVTKMesh()->GetPointCells(i, cellIds);

    MatrixType K_ii(3, 3, 0.0);

    vtkSmartPointer<vtkIdList> elPtIds = vtkSmartPointer<vtkIdList>::New();

    for (unsigned int j = 0; j < cellIds->GetNumberOfIds(); j++)
    {
      unsigned int el = cellIds->GetId(j);

      MatrixType delN = m_CurrentMesh->ComputeShapeFunctionDerivatives(el);

      m_CurrentMesh->GetVTKMesh()->GetCellPoints(el, elPtIds);

      if (elPtIds->GetNumberOfIds() != 4)
        continue;

      unsigned int local_i = 0;
      for (local_i = 0; local_i < elPtIds->GetNumberOfIds(); local_i++)
      {
        if (elPtIds->GetId(local_i) == i)
          break;
      }

      if (local_i >= elPtIds->GetNumberOfIds())
        continue;

      MatrixType B_i(6, 3, 0.0);

      B_i(0, 0) = delN(local_i, 0);
      B_i(1, 1) = delN(local_i, 1);
      B_i(2, 2) = delN(local_i, 2);

      B_i(3, 0) = delN(local_i, 1);
      B_i(3, 1) = delN(local_i, 0);

      B_i(4, 0) = delN(local_i, 2);
      B_i(4, 2) = delN(local_i, 0);

      B_i(5, 1) = delN(local_i, 2);
      B_i(5, 2) = delN(local_i, 1);

      MatrixType D = m_BrainElasticityMatrix;

      if (m_ElementLabels[el] == 3)
        continue; // Assume vent has zero Young mod and zero Poisson ratio
      if (m_ElementLabels[el] == 4)
        D = m_FalxElasticityMatrix;
      if (m_ElementLabels[el] == 9)
        D = m_DuraElasticityMatrix;
      if (m_ElementLabels[el] == 5)
        continue; // Assume internal tumor has no resistance

      MatrixType K_ii_el = B_i.transpose() * D * B_i;

      K_ii += K_ii_el * m_CurrentMesh->ComputeElementVolume(el);
    } // for j

    // Check determinant
    float det = vnl_determinant(K_ii);

    if (det < 1e-20)
      continue;

    VectorType u_i(3);
    for (unsigned int d = 0; d < 3; d++)
      u_i[d] = u[i*3 + d];

    MatrixType invK_ii = MatrixInverseType(K_ii);
    u_i = invK_ii * u_i;

    for (unsigned int d = 0; d < 3; d++)
      condV[i*3 + d] += u_i[d];

  } // for i

  return condV;

}

ITK_THREAD_RETURN_TYPE
TumorMassEffectGenerator
::_productKuNoDiagonalThread(void* arg)
{
  typedef itk::MultiThreader::ThreadInfoStruct  ThreadInfoType;
  ThreadInfoType * infoStruct = static_cast<ThreadInfoType*>( arg );
  TumorMassEffectGenerator* obj = static_cast<TumorMassEffectGenerator*>(
    infoStruct->UserData);

  unsigned int numPoints = obj->m_CurrentMesh->GetNumberOfPoints();
  unsigned int numElements = obj->m_CurrentMesh->GetNumberOfElements();

  // Update values
  while (true)
  {
    unsigned int el = obj->GetNextElement();

    if (el >= numElements)
      break;

    vtkSmartPointer<vtkIdList> ptIds = vtkSmartPointer<vtkIdList>::New();

    (obj->m_Mutex).Lock();
    obj->m_CurrentMesh->GetVTKMesh()->GetCellPoints(el, ptIds);
    (obj->m_Mutex).Unlock();

    unsigned int numIds = ptIds->GetNumberOfIds();

    if (numIds != 4)
      continue;

    MatrixType delN = obj->m_CurrentMesh->ComputeShapeFunctionDerivatives(el);

    float vol = obj->m_CurrentMesh->ComputeElementVolume(el);

    for (unsigned int i = 0; i < numIds; i++)
    {
      MatrixType B_i(6, 3, 0.0);

      B_i(0, 0) = delN(i, 0);
      B_i(1, 1) = delN(i, 1);
      B_i(2, 2) = delN(i, 2);

      B_i(3, 0) = delN(i, 1);
      B_i(3, 1) = delN(i, 0);

      B_i(4, 0) = delN(i, 2);
      B_i(4, 2) = delN(i, 0);

      B_i(5, 1) = delN(i, 2);
      B_i(5, 2) = delN(i, 1);

      MatrixType B_i_trans = B_i.transpose();

      unsigned int glob_i = ptIds->GetId(i);

      for (unsigned int j = 0; j < numIds; j++)
      {
        if (i == j)
          continue;

        unsigned int glob_j = ptIds->GetId(j);

        if (glob_i == glob_j)
          continue;

        MatrixType B_j(6, 3, 0.0);

        B_j(0, 0) = delN(j, 0);
        B_j(1, 1) = delN(j, 1);
        B_j(2, 2) = delN(j, 2);

        B_j(3, 0) = delN(j, 1);
        B_j(3, 1) = delN(j, 0);

        B_j(4, 0) = delN(j, 2);
        B_j(4, 2) = delN(j, 0);

        B_j(5, 1) = delN(j, 2);
        B_j(5, 2) = delN(j, 1);

        MatrixType D = obj->m_BrainElasticityMatrix;

        if (obj->m_ElementLabels[el] == 3)
          continue; // Assume vent has zero Young mod and zero Poisson ratio
        if (obj->m_ElementLabels[el] == 4)
          D = obj->m_FalxElasticityMatrix;
        if (obj->m_ElementLabels[el] == 9)
          D = obj->m_DuraElasticityMatrix;
        if (obj->m_ElementLabels[el] == 5)
          continue; // Assume internal tumor has no resistance

        MatrixType Kij_el = B_i_trans * D * B_j;
        Kij_el *= vol;

        VectorType u_j(3, 0.0);
        for (unsigned int d = 0; d < 3; d++)
          u_j[d] = obj->_MT_u[glob_j*3 + d];

        VectorType w_el = Kij_el * u_j;

        (obj->m_Mutex).Lock();
        for (unsigned int d = 0; d < 3; d++)
          obj->_MT_Ku[glob_i*3 + d] += w_el[d];
        (obj->m_Mutex).Unlock();

      } // for j
    } // for i
  
  } // for K*u
  
  return ITK_THREAD_RETURN_VALUE;
}

TumorMassEffectGenerator::VectorType
TumorMassEffectGenerator
::ProductKuNoDiagonal(const VectorType& u)
{
  unsigned int numPoints = m_CurrentMesh->GetNumberOfPoints();
  unsigned int numElements = m_CurrentMesh->GetNumberOfElements();

  if (u.size() != (numPoints*3))
    throw std::runtime_error(
      "[TumorMassEffectGenerator::ProductKuNoDiagonal] Invalid vector input");

  if (u.magnitude() < 1e-10)
    return VectorType(numPoints*3, 0.0);

  this->_MT_u = u;
  this->_MT_Ku = VectorType(numPoints*3, 0.0);

  // Try to use half of the number of CPUs, default is min(#cpus, 8)
  // NOTE: default is #cpus in latest ITK?
  itk::MultiThreader::Pointer threader = itk::MultiThreader::New();

  int numThreads = this->GetNumberOfThreads();
  if (numThreads == 0)
    numThreads = threader->GetGlobalDefaultNumberOfThreads() / 4 * 3;
  if (numThreads < 2)
    numThreads = 2;

  this->ResetElementCounter();

  threader->SetNumberOfThreads(numThreads);
  threader->SetSingleMethod(
    &TumorMassEffectGenerator::_productKuNoDiagonalThread, (void*)this);
  threader->SingleMethodExecute();

  return this->_MT_Ku;
}

ITK_THREAD_RETURN_TYPE
TumorMassEffectGenerator
::_productKuThread(void* arg)
{
  typedef itk::MultiThreader::ThreadInfoStruct  ThreadInfoType;
  ThreadInfoType * infoStruct = static_cast<ThreadInfoType*>( arg );
  TumorMassEffectGenerator* obj = static_cast<TumorMassEffectGenerator*>(
    infoStruct->UserData);

  unsigned int numPoints = obj->m_CurrentMesh->GetNumberOfPoints();
  unsigned int numElements = obj->m_CurrentMesh->GetNumberOfElements();

  // Update values
  while (true)
  {
    unsigned int el = obj->GetNextElement();

    if (el >= numElements)
      break;

    vtkSmartPointer<vtkIdList> ptIds = vtkSmartPointer<vtkIdList>::New();

    (obj->m_Mutex).Lock();
    obj->m_CurrentMesh->GetVTKMesh()->GetCellPoints(el, ptIds);
    (obj->m_Mutex).Unlock();

    unsigned int numIds = ptIds->GetNumberOfIds();

    if (numIds != 4)
      continue;

    MatrixType delN = obj->m_CurrentMesh->ComputeShapeFunctionDerivatives(el);

    float vol = obj->m_CurrentMesh->ComputeElementVolume(el);

    for (unsigned int i = 0; i < numIds; i++)
    {
      MatrixType B_i(6, 3, 0.0);

      B_i(0, 0) = delN(i, 0);
      B_i(1, 1) = delN(i, 1);
      B_i(2, 2) = delN(i, 2);

      B_i(3, 0) = delN(i, 1);
      B_i(3, 1) = delN(i, 0);

      B_i(4, 0) = delN(i, 2);
      B_i(4, 2) = delN(i, 0);

      B_i(5, 1) = delN(i, 2);
      B_i(5, 2) = delN(i, 1);

      MatrixType B_i_trans = B_i.transpose();

      unsigned int glob_i = ptIds->GetId(i);

      for (unsigned int j = 0; j < numIds; j++)
      {
        MatrixType B_j(6, 3, 0.0);

        B_j(0, 0) = delN(j, 0);
        B_j(1, 1) = delN(j, 1);
        B_j(2, 2) = delN(j, 2);

        B_j(3, 0) = delN(j, 1);
        B_j(3, 1) = delN(j, 0);

        B_j(4, 0) = delN(j, 2);
        B_j(4, 2) = delN(j, 0);

        B_j(5, 1) = delN(j, 2);
        B_j(5, 2) = delN(j, 1);

        MatrixType D = obj->m_BrainElasticityMatrix;

        if (obj->m_ElementLabels[el] == 3)
          continue; // Assume vent has zero Young mod and zero Poisson ratio
        if (obj->m_ElementLabels[el] == 4)
          D = obj->m_FalxElasticityMatrix;
        if (obj->m_ElementLabels[el] == 9)
          D = obj->m_DuraElasticityMatrix;
        if (obj->m_ElementLabels[el] == 5)
          continue; // Assume internal tumor has no resistance

        MatrixType Kij_el = B_i_trans * D * B_j;
        Kij_el *= vol;

        unsigned int glob_j = ptIds->GetId(j);

        VectorType u_j(3, 0.0);
        for (unsigned int d = 0; d < 3; d++)
          u_j[d] = obj->_MT_u[glob_j*3 + d];

        VectorType w_el = Kij_el * u_j;

        (obj->m_Mutex).Lock();
        for (unsigned int d = 0; d < 3; d++)
          obj->_MT_Ku[glob_i*3 + d] += w_el[d];
        (obj->m_Mutex).Unlock();

      } // for j
    } // for i
  
  } // for K*u
  
  return ITK_THREAD_RETURN_VALUE;
}

TumorMassEffectGenerator::VectorType
TumorMassEffectGenerator
::ProductKu(const VectorType& u)
{
  unsigned int numPoints = m_CurrentMesh->GetNumberOfPoints();
  unsigned int numElements = m_CurrentMesh->GetNumberOfElements();

  if (u.size() != (numPoints*3))
    throw std::runtime_error(
      "[TumorMassEffectGenerator::ProductKu] Invalid vector input");

  if (u.magnitude() < 1e-10)
    return VectorType(numPoints*3, 0.0);

  this->_MT_u = u;
  this->_MT_Ku = VectorType(numPoints*3, 0.0);

  // Try to use half of the number of CPUs, default is min(#cpus, 8)
  // NOTE: default is #cpus in latest ITK?
  itk::MultiThreader::Pointer threader = itk::MultiThreader::New();

  int numThreads = this->GetNumberOfThreads();
  if (numThreads == 0)
    numThreads = threader->GetGlobalDefaultNumberOfThreads() / 4 * 3;
  if (numThreads < 2)
    numThreads = 2;

  this->ResetElementCounter();

  threader->SetNumberOfThreads(numThreads);
  threader->SetSingleMethod(
    &TumorMassEffectGenerator::_productKuThread, (void*)this);
  threader->SingleMethodExecute();

  return this->_MT_Ku;

}

TumorMassEffectGenerator::VectorType
TumorMassEffectGenerator
::GMRES(const VectorType& init, const VectorType& f, unsigned int maxiters)
{
  unsigned int numPoints = m_CurrentMesh->GetNumberOfPoints();
  unsigned int numElements = m_CurrentMesh->GetNumberOfElements();

  VectorType r;
  if (init.magnitude() >= 1e-10)
    r = f - this->ProductKu(init);
  else
    r = f;

  // TODO: preconditioning?
  //r = this->Precondition(r);

  MatrixType H(maxiters+2, maxiters+2, 0.0);

  MatrixType V(numPoints*3, maxiters+2, 0.0);

  float rho = r.magnitude();
  V.set_column(1, r / rho);

  float beta = rho;

  //float errTol = numPoints * 3 * 1e-2;
  float errTol = f.magnitude() * 1e-4;

  VectorType y;

  unsigned int k = 0;

  while ((rho > errTol) && (k < maxiters))
  {
    ++k;

    // No precond
    VectorType Kv = this->ProductKu(V.get_column(k)); 
    V.set_column(k+1, Kv);

    // Left precond
    //VectorType Kv = this->ProductKu(V.get_column(k)); 
    //Kv = this->Precondition(Kv);
    //V.set_column(k+1, Kv);

    for (unsigned int j = 1; j <= k; j++)
    {
      H(j, k) = dot_product(V.get_column(k+1), V.get_column(j));
      V.set_column(k+1, V.get_column(k+1) - V.get_column(j)*H(j, k));
    }
    H(k+1, k) = V.get_column(k+1).magnitude();

    if (H(k+1, k) != 0.0)
      V.set_column(k+1, V.get_column(k+1)/H(k+1, k));

    MatrixType Hk = H.extract(k+1, k, 1, 1);

    VectorType be(k+1, 0.0);
    be[0] = beta;

    MatrixQRType qr(Hk);
    y = qr.solve(be);
    //MatrixSVDType svd(Hk);
    //y = svd.solve(be);

    VectorType errvec = be - Hk*y;

    rho = errvec.magnitude();
//std::cout << "Error iter " << k << " = " << rho << std::endl;
  }

  return init + V.extract(numPoints*3, k, 0, 1) * y;
}

void
TumorMassEffectGenerator
::RefineSolutionsGMRES()
{

  // Preparation
  // Recompute element labeling, since tetrahedral element configuration
  // might change after displacement and Delaunay
  this->ComputeElementLabels();

  //this->DetermineSingularPoints();

  unsigned int numPoints = m_CurrentMesh->GetNumberOfPoints();
  unsigned int numElements = m_CurrentMesh->GetNumberOfElements();

  // Flatten the force input
  VectorType f(numPoints*3, 0.0);

  for (unsigned int i = 0; i < numPoints; i++)
  {
    for (unsigned int d = 0; d < 3; d++)
      f[i*3 + d] = m_CachedForceMatrix(i, d);
  }

//std::cout << "f norm = " << f.magnitude() << std::endl;

  // Output vector, initially zero
  VectorType u(numPoints*3, 0.0);

  // Use result from previous application of block Jacobi iterations
  for (unsigned int i = 0; i < numPoints; i++)
  {
    for (unsigned int d = 0; d < 3; d++)
      u[i*3+d] = m_NodeSolutions(i, d);
  }

  for (unsigned int iter = 0; iter < m_DeformationSolverIterations; iter++)
  {
    // Repeated application of GMRES to avoid using too much memory
    u = this->GMRES(u, f, 5);
  }

  f.set_size(1);

  // Package u vector as nx3 matrix
  m_NodeSolutions = MatrixType(numPoints, 3);
  for (unsigned int i = 0; i < numPoints; i++)
  {
    for (unsigned int d = 0; d < 3; d++)
      m_NodeSolutions(i, d) = u[i*3 + d];
  }

  u.set_size(1);

  // Clamp disp magnitude
  for (unsigned int k = 0; k < numPoints; k++)
  {
    VectorType u_k = m_NodeSolutions.get_row(k);

    for (int dim = 0; dim < 3; dim++)
    {
      if (vnl_math_isnan(u_k[dim]))
        u_k[dim] = 0.0;
      if (vnl_math_isinf(u_k[dim]))
        u_k[dim] = 0.0;
    }

    float umag = u_k.magnitude();
    if (umag > MAX_DISP)
    {
      u_k *= (MAX_DISP / umag);
    }

    m_NodeSolutions.set_row(k, u_k);
  }

  // Apply BC

  // Create interpolator for the brain distance image so we can get gradients
  typedef itk::BSplineInterpolateImageFunction<FloatImageType, double>
    InterpolatorType;

  InterpolatorType::Pointer brainDistInterp = InterpolatorType::New();
  brainDistInterp->SetInputImage(m_BrainDistanceImage);
  brainDistInterp->SetSplineOrder(3);

  for (unsigned int k = 0; k < numPoints; k++)
  {
    if (m_NodeBCMarkers[k] == FixedBC)
    {
      for (int d = 0; d < 3; d++)
         m_NodeSolutions(k, d) = 0;
      continue;
    }

    if (m_NodeBCMarkers[k] == SlidingBC)
    {
      VectorType u_k = m_NodeSolutions.get_row(k);

      // Compute normal at node point
      double x[3];
      m_CurrentMesh->GetVTKMesh()->GetPoint(k, x);

      FloatImagePointType p;
      p[0] = x[0];
      p[1] = x[1];
      p[2] = x[2];

      if (!brainDistInterp->IsInsideBuffer(p))
        continue;

      // Obtain gradient
      InterpolatorType::CovariantVectorType gradPhi =
        brainDistInterp->EvaluateDerivative(p);

      // Normalize gradient
      float norm = gradPhi.GetNorm();
      if (norm < 1e-20)
      {
        // Zero displacement solution if normal is undefined
        for (int dim = 0; dim < 3; dim++)
          m_NodeSolutions(k, dim) = 0.0;
        continue;
      }
      gradPhi /= norm;

      // Subtract the component along normal direction
      float dotProduct = 0;
      for (int dim = 0; dim < 3; dim++)
        dotProduct += u_k[dim] * gradPhi[dim];
      for (int dim = 0; dim < 3; dim++)
        u_k[dim] -= dotProduct * gradPhi[dim];

      m_NodeSolutions.set_row(k, u_k);
    }
  } // BC loop

}

void
TumorMassEffectGenerator
::UpdateMesh(DeformationFieldPointer mapImg)
{
  typedef itk::LinearInterpolateImageFunction<FloatImageType, double>
    InterpolatorType;
  InterpolatorType::Pointer brainDistInterp = InterpolatorType::New();
  brainDistInterp->SetInputImage(m_BrainDistanceImage);

  typedef itk::VectorLinearInterpolateImageFunction<DeformationFieldType, double>
    VectorInterpolatorType;
  VectorInterpolatorType::Pointer mapInterp = VectorInterpolatorType::New();
  mapInterp->SetInputImage(mapImg);

  //vtkUnstructuredGrid* initVTKMesh = m_InitialMesh->GetVTKMesh();
  vtkUnstructuredGrid* currVTKMesh = m_CurrentMesh->GetVTKMesh();
  //vtkSmartPointer<vtkUnstructuredGrid> movedMesh = vtkSmartPointer<vtkUnstructuredGrid>::New();
  //movedMesh->DeepCopy(m_InitialMesh->GetVTKMesh());

  unsigned int numPoints = m_InitialMesh->GetNumberOfPoints();

  //vtkPoints* points = currVTKMesh->GetPoints();

  vtkSmartPointer<vtkPoints> points = vtkSmartPointer<vtkPoints>::New();
  points->Allocate(numPoints);

  double x[3];

  for (unsigned int k = 0; k < numPoints; k++)
  {
    //initVTKMesh->GetPoint(k, x);
    currVTKMesh->GetPoint(k, x);
    //movedMesh->GetPoint(k, x);

    for (int d = 0; d < 3; d++)
      m_NodeCompSolutions(k, d) = x[d];

    if (m_NodeBCMarkers[k] == FixedBC)
    {
      points->InsertNextPoint(x);
      continue;
    }

    FloatImagePointType p;
    for (int d = 0; d < 3; d++)
      p[d] = x[d];

    if (!mapInterp->IsInsideBuffer(p))
    {
      // Ignore moves to outside of image
      points->InsertNextPoint(x);
      continue;
    }

    DisplacementType h = mapInterp->Evaluate(p);

    for (int d = 0; d < 3; d++)
      p[d] = h[d];

    float phi = brainDistInterp->Evaluate(p);

    if (phi > -DISTANCE_EPS)
    {
      // Ignore moves to outside of brain
      points->InsertNextPoint(x);
    }
    else
    {
      x[0] = h[0];
      x[1] = h[1];
      x[2] = h[2];
      points->InsertNextPoint(x);
      //movedMesh->GetPoints()->SetPoint(k, x);

      for (int d = 0; d < 3; d++)
        m_NodeCompSolutions(k, d) = h[d];
    }
  }

  delete m_CurrentMesh;
  m_CurrentMesh = new LinearTetrahedralMesh();
  m_CurrentMesh->SetVTKMesh(
    createMesh3D(points, m_BrainDistanceImage, m_UseQHull) );
    //movedMesh);

}

void
TumorMassEffectGenerator
::ComputeDeformation()
{
  // Check inputs
  if (m_LabelImage.IsNull())
    muExceptionMacro(<< "No label image specified");

  if (m_InitialVTKMesh.GetPointer() == 0)
    muExceptionMacro(<< "Initial mesh not specified");

  typedef itk::ImageRegionIteratorWithIndex<ByteImageType>
    LabelIterator;

  LabelIterator labelIt(m_LabelImage, m_LabelImage->GetRequestedRegion());

  // Compute maximum label
  unsigned int maxLabel = 0;
  labelIt.GoToBegin();
  while (!labelIt.IsAtEnd())
  {
    unsigned char c = labelIt.Get();
    if (c > maxLabel)
      maxLabel = c;
    ++labelIt;
  }

  // Ignore, in case of using this for cases without tumor
  //if (maxLabel != 5)
  //  muExceptionMacro(<< "Error in label image");

  this->ComputeTumorCentroids();

  // Modify mesh to account for specified tumor boundary
  this->ModifyInitialMesh();

  delete m_CurrentMesh;
  m_CurrentMesh = new LinearTetrahedralMesh();
  m_CurrentMesh->SetVTKMesh(m_InitialMesh->GetVTKMesh());

  this->DetermineBCs();

  //if (m_TumorPressureSurfaces.GetSize() == 0)
  //  muExceptionMacro(<< "No surface with pressure BC");

  unsigned int numPoints = m_InitialMesh->GetNumberOfPoints();
  unsigned int numElements = m_InitialMesh->GetNumberOfElements();

  // Initialize compounded deformation solution with zeros
  m_NodeCompSolutions = MatrixType(numPoints, 3, 0.0);

  // Allocate deformation field
  DeformationFieldType::Pointer currDefImg = DeformationFieldType::New();
  // NOTE: CopyInformation fails for vector image
  //currDefImg->CopyInformation(m_LabelImage);
  currDefImg->SetDirection(m_LabelImage->GetDirection());
  currDefImg->SetOrigin(m_LabelImage->GetOrigin());
  currDefImg->SetSpacing(m_LabelImage->GetSpacing());
  currDefImg->SetRegions(m_LabelImage->GetLargestPossibleRegion());
  currDefImg->Allocate();

  m_Deformation = DeformationFieldType::New();
  //m_Deformation->CopyInformation(m_LabelImage);
  m_Deformation->SetDirection(m_LabelImage->GetDirection());
  m_Deformation->SetOrigin(m_LabelImage->GetOrigin());
  m_Deformation->SetSpacing(m_LabelImage->GetSpacing());
  m_Deformation->SetRegions(m_LabelImage->GetLargestPossibleRegion());
  m_Deformation->Allocate();

  m_InverseDeformation = DeformationFieldType::New();
  //m_InverseDeformation->CopyInformation(m_LabelImage);
  m_InverseDeformation->SetDirection(m_LabelImage->GetDirection());
  m_InverseDeformation->SetOrigin(m_LabelImage->GetOrigin());
  m_InverseDeformation->SetSpacing(m_LabelImage->GetSpacing());
  m_InverseDeformation->SetRegions(m_LabelImage->GetLargestPossibleRegion());
  m_InverseDeformation->Allocate();

  DisplacementType zeroDisp;
  zeroDisp.Fill(0.0);

  m_Deformation->FillBuffer(zeroDisp);
  m_InverseDeformation->FillBuffer(zeroDisp);

  float origPressure = m_Pressure;

  // Do iterative estimation of displacements
  for (unsigned int iter = 1; iter <= m_DeformationIterations; iter++)
  {
    if (m_Pressure < 5e-4)
      m_Pressure = 5e-4;

     muLogMacro(
       << "  * Deformation iteration " << iter << "\n");

    this->ComputeForces(m_CachedForceMatrix);

#if DO_JACOBI
    this->UpdateSolutionsJacobi();
#else
    m_NodeSolutions = MatrixType(numPoints, 3, 0.0);
#endif

    this->RefineSolutionsGMRES();

    m_CachedForceMatrix.set_size(1, 1);

    this->UpdateDeformationField(currDefImg);

    this->InvertAccumulateDeformationField(
      m_Deformation, m_InverseDeformation, currDefImg);

    // Update mesh using compounded forward map
    this->UpdateMesh(m_InverseDeformation);

    // Reduce pressure as we go through multiple displacements
    m_Pressure *= m_Damping;
  }

  // Restore initial value if we're using pressure "schedule"
  m_Pressure = origPressure;

  // Clean up memory
  delete m_InitialMesh;
  m_InitialMesh = 0;
  m_CurrentMesh->ClearMappings(); // Keep final VTK mesh, but remove mappings

/*
  // Smooth the deformation fields
  //typedef VectorMeanImageFilter<DeformationFieldType, DeformationFieldType>
  typedef VectorBlurImageFilter<DeformationFieldType, DeformationFieldType>
    DeformationSmootherType;

  // Smoothen final accumulated deformations
  DeformationSmootherType::Pointer defsmoother = DeformationSmootherType::New();
  defsmoother->SetKernelWidth(1.0);
  defsmoother->SetInput(m_InverseDeformation);
  defsmoother->Update();
  m_InverseDeformation = defsmoother->GetOutput();

  DeformationSmootherType::Pointer defsmoother2 = DeformationSmootherType::New();
  defsmoother2->SetKernelWidth(1.0);
  //defsmoother2->SetInput(invf->GetOutput());
  defsmoother2->SetInput(m_Deformation);
  defsmoother2->Update();
  m_Deformation = defsmoother2->GetOutput();
*/
}

ITK_THREAD_RETURN_TYPE
TumorMassEffectGenerator
::_fillDeformationThread(void* arg)
{
  typedef itk::MultiThreader::ThreadInfoStruct  ThreadInfoType;
  ThreadInfoType * infoStruct = static_cast<ThreadInfoType*>( arg );
  TumorMassEffectGenerator* obj = static_cast<TumorMassEffectGenerator*>(
    infoStruct->UserData);

  unsigned int numPoints = obj->m_CurrentMesh->GetNumberOfPoints();
  unsigned int numElements = obj->m_CurrentMesh->GetNumberOfElements();

  ByteImageSizeType size =
    obj->m_LabelImage->GetLargestPossibleRegion().GetSize();
  ByteImageSpacingType spacing = obj->m_LabelImage->GetSpacing();
  ByteImagePointType orig = obj->m_LabelImage->GetOrigin();

  while (true)
  {
    unsigned int el = obj->GetNextElement();

    if (el >= numElements)
      break;

    vtkSmartPointer<vtkIdList> ptIds = vtkSmartPointer<vtkIdList>::New();

    (obj->m_Mutex).Lock();
    obj->m_CurrentMesh->GetVTKMesh()->GetCellPoints(el, ptIds);
    (obj->m_Mutex).Unlock();

    if (ptIds->GetNumberOfIds() != 4)
      continue;

    // Compute bounding box of this element
    long minext[3];
    long maxext[3];

    for (int d = 0; d < 3; d++)
    {
      minext[d] = size[d]-1;
      maxext[d] = 0;
    }

    for (unsigned int k = 0; k < 4; k++)
    {
      double x[3];

      (obj->m_Mutex).Lock();
      obj->m_CurrentMesh->GetVTKMesh()->GetPoint(ptIds->GetId(k), x);
      (obj->m_Mutex).Unlock();

      FloatImagePointType p;
      for (int d = 0; d < 3; d++)
        p[d] = x[d];

      FloatImageIndexType ind;
      obj->m_LabelImage->TransformPhysicalPointToIndex(p, ind);

      for (int d = 0; d < 3; d++)
      {
        if (ind[d] < minext[d])
          minext[d] = ind[d];
        if (ind[d] > maxext[d])
          maxext[d] = ind[d];
      }
    }

    // Expand bounding box slightly
    for (int d = 0; d < 3; d++)
    {
      minext[d] -= 5;
      maxext[d] += 5;

      if (minext[d] < 0)
        minext[d] = 0;
      if (maxext[d] >= (long)size[d])
        maxext[d] = size[d]-1;
    }   

    // Get nodal displacements
    DynArray<DisplacementType> nodeDisps;
    for (int t = 0; t < 4; t++)
    {
      DisplacementType u;

      unsigned int r = ptIds->GetId(t);
      for (unsigned int c = 0; c < 3; c++)
        u[c] = obj->m_NodeSolutions(r, c);

      nodeDisps.Append(u);
    }

    // Fill in image values within bounding box
    for (long k = minext[2]; k <= maxext[2]; k++)
      for (long j = minext[1]; j <= maxext[1]; j++)
        for (long i = minext[0]; i <= maxext[0]; i++)
        {
          FloatImageIndexType ind = {{i, j, k}};
      
          if (obj->m_LabelImage->GetPixel(ind) == 0)
            continue; 

          FloatImagePointType p;
          obj->m_LabelImage->TransformIndexToPhysicalPoint(ind, p);

          VectorType x(3);
          for (int d = 0; d < 3; d++)
            x[d] = p[d];

          VectorType N = obj->m_CurrentMesh->ComputeShapeFunctions(el, x);

          float minN = N[0];
          float sumN = N[0];
          for (int t = 1; t < 4; t++)
          {
            if (N[t] < minN)
              minN = N[t];
            sumN += N[t];
          }

          // Inside / outside test
          if (minN < -0.01)
            continue;

          DisplacementType sol;
          sol.Fill(0.0);
          for (int t = 0; t < 4; t++)
            sol += nodeDisps[t] * N[t];
            
          // Compound solution values
          (obj->m_Mutex).Lock();
          obj->_MT_def->SetPixel(ind, obj->_MT_def->GetPixel(ind) + sol);
          obj->_MT_w->SetPixel(ind, obj->_MT_w->GetPixel(ind) + sumN);
          (obj->m_Mutex).Unlock();

        } // for bbox pixels
          
  } // for el

  return ITK_THREAD_RETURN_VALUE;

}

void
TumorMassEffectGenerator
::UpdateDeformationField(DeformationFieldPointer defImg)
{
  // Image parameters
  ByteImageSizeType size = m_LabelImage->GetLargestPossibleRegion().GetSize();
  ByteImageSpacingType spacing = m_LabelImage->GetSpacing();
  ByteImagePointType orig = m_LabelImage->GetOrigin();

  DisplacementType zeroDisp;
  zeroDisp.Fill(0.0);
  defImg->FillBuffer(zeroDisp);

  FloatImageType::Pointer weightImg = FloatImageType::New();
  weightImg->CopyInformation(m_LabelImage);
  weightImg->SetRegions(m_LabelImage->GetLargestPossibleRegion());
  weightImg->Allocate();
  weightImg->FillBuffer(0.0);

  _MT_def = defImg;
  _MT_w = weightImg;

  itk::MultiThreader::Pointer threader = itk::MultiThreader::New();

  int numThreads = this->GetNumberOfThreads();
  if (numThreads == 0)
    numThreads = threader->GetGlobalDefaultNumberOfThreads() / 4 * 3;
  if (numThreads < 2)
    numThreads = 2;

  this->ResetElementCounter();

  threader->SetNumberOfThreads(numThreads);
  threader->SetSingleMethod(
    &TumorMassEffectGenerator::_fillDeformationThread, (void*)this);
  threader->SingleMethodExecute();

  // Divide by sum of weights
  typedef itk::ImageRegionIteratorWithIndex<FloatImageType> FloatIteratorType;
  FloatIteratorType it(weightImg, weightImg->GetLargestPossibleRegion());

  for (it.GoToBegin(); !it.IsAtEnd(); ++it)
  {
    FloatImageIndexType ind = it.GetIndex();

    if (m_LabelImage->GetPixel(ind) == 0)
      continue;

    float w = it.Get();
    if (w < 1e-20)
      continue;

    DisplacementType u = defImg->GetPixel(ind);
    u /= w;

    defImg->SetPixel(ind, u);
  }

}

TumorMassEffectGenerator::DeformationFieldPointer
TumorMassEffectGenerator
::Compose(
  DeformationFieldPointer inDef1, 
  DeformationFieldPointer inDef2)
{
  DisplacementType edgev;
  edgev.Fill(vnl_huge_val(1.0f));
  DisplacementType zerov;
  zerov.Fill(0.0);

  // Build map image
  DeformationFieldPointer mapImg = DeformationFieldType::New();
  // NOTE: CopyInformation fails for vector images?
  //mapImg->CopyInformation(inDef1);
  mapImg->SetDirection(inDef1->GetDirection());
  mapImg->SetOrigin(inDef1->GetOrigin());
  mapImg->SetSpacing(inDef1->GetSpacing());
  mapImg->SetRegions(inDef1->GetLargestPossibleRegion());
  mapImg->Allocate();

  typedef itk::ImageRegionIteratorWithIndex<DeformationFieldType>
    DefIteratorType;
  DefIteratorType it(mapImg, mapImg->GetLargestPossibleRegion());

  for (it.GoToBegin(); !it.IsAtEnd(); ++it)
  {
    FloatImageIndexType ind = it.GetIndex();

    FloatImagePointType p;
    mapImg->TransformIndexToPhysicalPoint(ind, p);

    DisplacementType v = inDef1->GetPixel(ind);
    for (int dim = 0; dim < 3; dim++)
      v[dim] += p[dim];

    it.Set(v);
  }

  // Warp the map image
  typedef itk::WarpVectorImageFilter<
    DeformationFieldType, DeformationFieldType, DeformationFieldType>
    WarperType;
  WarperType::Pointer warpf = WarperType::New();
  warpf->SetInput(mapImg);
#if ITK_VERSION_MAJOR >= 4
  warpf->SetDisplacementField(inDef2);
#else
  warpf->SetDeformationField(inDef2);
#endif
  warpf->SetEdgePaddingValue(edgev);
  warpf->SetOutputDirection(mapImg->GetDirection());
  warpf->SetOutputOrigin(mapImg->GetOrigin());
  warpf->SetOutputSpacing(mapImg->GetSpacing());
  warpf->Update();

  DeformationFieldPointer outDef = warpf->GetOutput();

  // Compute deformation field (warped - identity)
  for (it.GoToBegin(); !it.IsAtEnd(); ++it)
  {
    FloatImageIndexType ind = it.GetIndex();

    FloatImagePointType p;
    mapImg->TransformIndexToPhysicalPoint(ind, p);

    DisplacementType v = outDef->GetPixel(ind);

    bool isout = false;
    for (unsigned int dim = 0; dim < 3; dim++)
      if (vnl_math_isinf(v[dim]))
      {
        isout = true;
        break;
      }

    if (isout)
    {
      outDef->SetPixel(ind, zerov);
    }
    else
    {
      for (int dim = 0; dim < 3; dim++)
        v[dim] -= p[dim];
      outDef->SetPixel(ind, v);
    }
  }

  return outDef;
}

void
TumorMassEffectGenerator
::InvertAccumulateDeformationField(
  DeformationFieldPointer invDefImg, DeformationFieldPointer defImg, 
  DeformationFieldPointer currDefImg)
{

  // Smooth current deformation to help the inversion process
  typedef VectorBlurImageFilter<DeformationFieldType, DeformationFieldType>
    DeformationSmootherType;

  DeformationSmootherType::Pointer defsmoother = DeformationSmootherType::New();
  defsmoother->SetKernelWidth(1.0);
  defsmoother->SetInput(currDefImg);
  defsmoother->Update();

  currDefImg = defsmoother->GetOutput();

  // Compose forward deformation
  DeformationFieldPointer newDefImg = this->Compose(currDefImg, defImg);

  // Compute inverse
#if 1
  #if ITK_VERSION_MAJOR < 4
  typedef itk::IterativeInverseDeformationFieldImageFilter<
    DeformationFieldType, DeformationFieldType>
    InverterType;
  #else
  typedef itk::IterativeInverseDisplacementFieldImageFilter<
    DeformationFieldType, DeformationFieldType>
    InverterType;
  #endif
  InverterType::Pointer invf = InverterType::New();
  invf->SetNumberOfIterations(100);
  invf->SetStopValue(1e-4);
  invf->SetInput(defsmoother->GetOutput());
  invf->Update();
#else
  InverseDeformationFilter::Pointer invf = InverseDeformationFilter::New();
  invf->SetNumberOfIterations(100);
  invf->SetTolerance(1e-4);
  invf->SetMaximumDisplacementNorm(MAX_DISP);
  invf->SetInput(defsmoother->GetOutput());
  invf->SetMaskImage(m_LabelImage);
  invf->Update();
#endif

  DeformationFieldPointer currInvDef = invf->GetOutput();

  // Compose backward deformation
  DeformationFieldPointer newInvDefImg = this->Compose(invDefImg, currInvDef);

  // Store new results in proper container
  typedef itk::ImageRegionIteratorWithIndex<DeformationFieldType>
    DefIteratorType;
  DefIteratorType it(invDefImg, invDefImg->GetLargestPossibleRegion());

  for (it.GoToBegin(); !it.IsAtEnd(); ++it)
  {
    FloatImageIndexType ind = it.GetIndex();

    defImg->SetPixel(ind, newDefImg->GetPixel(ind));
    invDefImg->SetPixel(ind, newInvDefImg->GetPixel(ind));
  }
}
