/*=========================================================================

  Program:   Insight Segmentation & Registration Toolkit
  Module:    $RCSfile: itkHammerDeformableRegistrationImageFilter.txx,v $
  Language:  C++
  Date:      $Date: 2009/01/14 21:46:50 $
  Version:   $Revision: 1.6 $

  Copyright (c) Insight Software Consortium. All rights reserved.
  See ITKCopyright.txt or http://www.itk.org/HTML/Copyright.htm for
  details.

  This program is developed under NIH NCBC collaboration grant
  R01 EB006733, "Development and Dissemination of Robust Brain MRI
  Measurement Tools". 

  This software is distributed WITHOUT ANY WARRANTY; without even 
  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
  PURPOSE.  See the above copyright notices for more information.

  =========================================================================*/
#ifndef __itkHammerDeformableRegistrationImageFilter_txx
#define __itkHammerDeformableRegistrationImageFilter_txx
#include "itkHammerDeformableRegistrationImageFilter.h"

#include "itkImageRegionIterator.h"
#include "itkNeighborhoodAlgorithm.h"
#include "itkZeroFluxNeumannBoundaryCondition.h"
#include "itkGaussianOperator.h"
#include "itkVectorNeighborhoodOperatorImageFilter.h"
//#include "itkCannyEdgeStrengthImageFilter.h"
#include "itkResampleImageFilter.h"
#include "itkImageRegistrationMethod.h"
#include "itkMattesMutualInformationImageToImageMetric.h"
#include "itkRegularStepGradientDescentOptimizer.h"
#include "itkNearestNeighborInterpolateImageFunction.h"
#include "itkCenteredTransformInitializer.h"
#include "itkVectorResampleImageFilter.h"
#include "itkIdentityTransform.h"
#include "itkVectorLinearInterpolateImageFunction.h"
#include "itkMattesMutualInformationImageToImageMetric.h"
#include "itkImageRegistrationMethod.h"
#include "itkImageFileWriter.h"

//#define DUMP_DEFORMATIONFIELD
//#define DUMP_GMI_FEATURE
#define HammerMax(a,b) ((a)>(b)?(a):(b))
#define HammerMin(a,b) ((a)<(b)?(a):(b))
#define HammerSQR(a) ((a)*(a))
#ifdef DUMP_DEFORMATIONFIELD
typedef struct FVECTOR3D
{
  float x;
  float y;
  float z;
}Fvector3d;


Fvector3d ***Fvector3dalloc3d(int i_size,int j_size,int k_size)
{
  Fvector3d ***array;
  int i,k;

  array=(Fvector3d ***) calloc(k_size,sizeof(Fvector3d **));

  for(k=0;k<k_size;k++)
    array[k]=(Fvector3d **) calloc(i_size,sizeof(Fvector3d *));

  for(k=0;k<k_size;k++)
    for(i=0;i<i_size;i++)
      array[k][i]=(Fvector3d *) calloc(j_size,sizeof(Fvector3d ));

  return(array);
}


void Fvector3dfree3d(Fvector3d ***array,int k_size,int i_size)
{
  int k,i;

  for(k=0;k<k_size;k++)
    for(i=0;i<i_size;i++)
      free(array[k][i]);

  for(k=0;k<k_size;k++)
    free(array[k]);

  free(array);
}
#endif

namespace itk
{

  class CommandIterationUpdate : public itk::Command 
  {
  public:
    typedef  CommandIterationUpdate   Self;
    typedef  itk::Command             Superclass;
    typedef itk::SmartPointer<Self>  Pointer;
    itkNewMacro( Self );

  protected:
    CommandIterationUpdate() {};
    itk::ProcessObject::Pointer m_Registration;

  public:
    typedef itk::RegularStepGradientDescentOptimizer  OptimizerType;
    typedef   const OptimizerType   *    OptimizerPointer;

    void SetRegistration( itk::ProcessObject *p)
    {
      m_Registration = p;
    }

    void Execute(itk::Object *caller, const itk::EventObject & event)
    {
      Execute( (const itk::Object *)caller, event);
    }

    void Execute(const itk::Object * object, const itk::EventObject & event)
    {
      OptimizerPointer optimizer = 
	dynamic_cast< OptimizerPointer >( object );
      if( !(itk::IterationEvent().CheckEvent( &event )) )
	{
	  return;
	}

      std::cout << optimizer->GetCurrentIteration() << "   ";
      std::cout << optimizer->GetCurrentStepLength() << "   ";
      std::cout << optimizer->GetValue() << std::endl;
      if (m_Registration)
	{
	  m_Registration->UpdateProgress( 
					 static_cast<double>(optimizer->GetCurrentIteration()) /
					 static_cast<double>(optimizer->GetNumberOfIterations()));
	}
    }
  };


  //
  // Constructor
  //
  template <class TInputImage, class TOutputImage>
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::HammerDeformableRegistrationImageFilter()
  { 			
    RadiusType r;
    for(unsigned int j = 0; j < InputImageDimension; j++ )
      {
	r[j] = 0;
      }
    this->SetRadius(r);

    //set up the default parameter once the registration function is intialized
    m_Softmode = 1; 
    m_TPSmode = 0;
    m_Intensitymode = 0;

    SetupBasicParameters();
  }
	

  //
  // Destructor
  //
  template <class TInputImage, class TOutputImage>
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::~HammerDeformableRegistrationImageFilter()
  {

    // release memory if variables are not itk SmartPointer's
    if(m_TPSmode == 1)
      {
	for(int i=0;i<m_BlockSize*2+1;i++)
	  for(int j=0;j<m_BlockSize*2+1;j++)
	    free(LookingUpTable[i][j]);
	for(int i=0;i<m_BlockSize*2+1;i++)
	  free(LookingUpTable[i]);
	free(LookingUpTable);
      }
  }

  template <class TInputImage, class TOutputImage>
  void
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::SetupBasicParameters()
  {
    m_IsBigVN = true;
    m_NeighborhoodStep = 3;		
    m_IsYoungBrain = false;
    m_SmoothFactor = 0.5;
    m_StartSearchPoint = 0;
    m_MaximumError = 0.1;
    m_MaximumKernelWidth = 30;
    m_InitialDeformationField = true;
    m_NumberOfIterations = 50;

    if(m_Intensitymode == 0)
      {
	m_SubvolumnSimilarityThreshold = 0.6;
	m_PointMatching_Initial = 0.8;
	m_DeformRate = 0.05;
      }
    else
      {
	m_PointMatching_Initial = 0.6;
	m_DeformRate = 0.02;
	m_SubvolumnSimilarityThreshold = 0.75;
      }
    m_SubvolumnSimilarityThreshold = 0.6;
    m_SearchRadius = 12;
    m_DfmSmthCoeff = 0.5;
    m_SmoothTime = 1;
    m_Confidential = 1.0;
    m_AdditionalSmth = 3;
    m_BlockSize = 40;
    m_OverlapSize = 32;

    m_StartPercent = 0.26;
    m_CannyGuassianSigma = 1.5 ;
    m_PercentOfLowCannyEdge = 0.25 ; 
    m_MaxPercentOfAllowedDrivingVoxels = 0.65 ;
    m_FullDirectionEdge = false;

    CreateSearchNeighbor(m_SmoothNeighbor, 1);

    for(int l=0; l<MAX_LEVEL; l++)
      {
	m_GuassianAtLevelSigma[0][l] = 0 ;
      }

    for(int s=1; s<MAX_SIGMA; s++)
      {
	for(int l=0; l<MAX_LEVEL; l++)
	  {
	    m_GuassianAtLevelSigma[s][l] = exp(-l*l/(2.0*s*s)) ;
	  }
      }

    this->m_SubvolumeNeighborhood.clear();
    this->m_ModelDrivingPointDisplacement.clear();
    this->m_InverseDisplacement.clear();
    this->m_SearchNeighbor.clear();
    this->m_FixedImageDrivingVoxelQualification.resize(6);
    this->m_MovingImageDrivingVoxelQualification.resize(6);

    this->m_AffineTransform = AffineTransformType::New();
    this->m_AffineTransform->SetIdentity();

    this->m_AffineInitialization = true;

    this->m_Iterations[0] = 50;
    this->m_Iterations[1] = 50;
    this->m_Iterations[2] = 50;

    m_TPSBufferOccupied = false;
    if(m_TPSmode==1)
      {
	m_TPSmode = 1;
	int Len = m_BlockSize*2+1;
	LookingUpTable = (float ***)calloc(Len, sizeof(float **));
	for(int i=0;i<Len;i++)
	  {
	  LookingUpTable[i] = (float **)calloc(Len, sizeof(float *));
	  }
	for(int i=0;i<Len;i++)
	  {
	  for(int j=0;j<Len;j++)
	    {
	      LookingUpTable[i][j] = (float *)calloc(Len, sizeof(float));
	    }
	  }
	Make3DLookupTable(LookingUpTable, m_BlockSize);
	m_ResidualToleracne = 0.05;
	m_MaxTPSPoint = 400;
      }
  }

  template <class TInputImage, class TOutputImage>
  void
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::GenerateInputRequestedRegion() throw (InvalidRequestedRegionError)
  {
    // call the superclass' implementation of this method
    Superclass::GenerateInputRequestedRegion();

  }


  template <class TInputImage, class TOutputImage>
  void
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::GenerateData()
  {
    int CurrentLevel;
    bool m_UpsampleDeformationField;

    // ************
    // initialize
    // 1. check is FixedImage and MovingImage are set
    if (!m_FixedImage)
      {
	itkExceptionMacro(<<"FixedImage is not present");
      }

    if( !m_MovingImage )
      {
	itkExceptionMacro(<<"MovingImage is not present");
      }

    // do linear initialization if asked to do so
    if (this->m_AffineInitialization)
      {
	//this->LinearInitialization();
      }

    this->AllocateOutputs();
    this->m_FinalOutputDeformFld = this->GetOutput( 0 );

    /*
      Step 1. Downsample the moving and fixed image
      Step 2. Calculate the GMIs w.r.t. to current resolution
      Step 3. Initialize the deforaiton field or upsapmle from the last iteration
      Step 4. Initialize the driving voxels
      Step 5. HAMMER_Mainloop
      Step 6. Restore the deformation field
    */

    // for progress report, each iteration at mid res is weighted as 8
    // times of one iteration at low res, and high res iterations are
    // weighted as 32 times.
    itk::ProgressReporter progress(this, 0,
				   this->m_Iterations[0] 
				   + this->m_Iterations[1] * 8
				   + this->m_Iterations[2] *32 );

    for(CurrentLevel=2;CurrentLevel<3;CurrentLevel++)
      {
	InputImagePointer                     m_CurrFixedImage;
	InputImagePointer                     m_CurrMovingImage;
	ImageAttributePointerType             m_CurrFixedImageAttribute;
	ImageAttributePointerType             m_CurrMovingImageAttribute;
	DeformationFieldPointer               m_CurrDeformationField;
	typedef itk::HammerTissueAttributeVectorImageFilter<InputImageType, ImageAttributeType> AttributeFilterType;
	typename AttributeFilterType::Pointer attributeFilter = AttributeFilterType::New();
	//for intensity hammer
	typedef itk::HammerIntensityAttributeVectorImageFilter<InputImageType, ImageAttributeType> IntensityFilterType;
	typename IntensityFilterType::Pointer attributeFilter_4I = IntensityFilterType::New();
	double scale = 3;

	m_UpsampleDeformationField = true;
	m_TPSBufferOccupied = false;
	if(CurrentLevel==0)
	  {
	    // std::cout<<"Processing in low resolution"<<std::endl;
	    m_UpsampleDeformationField = false;
	    m_NumberOfIterations = this->m_Iterations[0];
	    scale = 3;
	    m_CurrentResolution = 0;
	    m_SearchRadius = 12;
	  }
	else if(CurrentLevel==1)
	  {
	    // std::cout<<"Processing in mid resolution"<<std::endl;
	    scale = 4;
	    m_NumberOfIterations = this->m_Iterations[1];
	    m_CurrentResolution = 1;
	    m_SearchRadius = 10;
	  }
	else if(CurrentLevel==2)
	  {
	    // std::cout<<"Processing in high resolution"<<std::endl;
	    scale = 7;
	    m_NumberOfIterations = this->m_Iterations[2];
	    m_CurrentResolution = 2;
	    m_SearchRadius = 8;
	  }

	//since intensity hammer always use scale=3 in all resolutions
	if(m_Intensitymode == 1)
	  scale =3;

	if (m_NumberOfIterations == 0)
	  {
	    continue;
	  }
	if(m_Intensitymode ==0)
	  {
	    attributeFilter->SetBGValue( 0 );
	    attributeFilter->SetGMValue( 150 );
	    attributeFilter->SetWMValue( 250 );
	    attributeFilter->SetVNValue( 50 );
	    attributeFilter->SetCSFValue( 10 );

	    attributeFilter->SetNumberOfThreads( 1 );
	    attributeFilter->SetStrength( 1 );
	    attributeFilter->SetScale( scale );
	  }
	else
	  {
	    attributeFilter_4I->SetScale( scale );
	    attributeFilter_4I->Setvariance(2.0);
	    attributeFilter_4I->SetupperThreshold(0.0);
	    attributeFilter_4I->SetlowerThreshold(0.0);
	    attributeFilter_4I->SetPercentOfLowCannyEdge(m_PercentOfLowCannyEdge);
	  }


	//for downsample the image
	typedef itk::ResampleImageFilter<InputImageType,InputImageType> DownSampleFilterType;
	typename DownSampleFilterType::Pointer DownSampleFilter = DownSampleFilterType::New();

	typedef itk::AffineTransform< double, 3 >  TransformType;
	TransformType::Pointer transform = TransformType::New();
	transform->SetIdentity();
	DownSampleFilter->SetTransform( transform );
	typedef itk::NearestNeighborInterpolateImageFunction<InputImageType, double >  InterpolatorType;
	typename InterpolatorType::Pointer nearestinterp = InterpolatorType::New();
	DownSampleFilter->SetDefaultPixelValue( 0 );
	DownSampleFilter->SetOutputSpacing( m_FixedImage->GetSpacing() );
	DownSampleFilter->SetOutputOrigin( m_FixedImage->GetOrigin() );
	DownSampleFilter->SetInterpolator(nearestinterp);
	DownSampleFilter->SetOutputDirection( m_FixedImage->GetDirection() );

	//for upsample the deformation field
	typedef itk::VectorResampleImageFilter<OutputImageType, OutputImageType >  DeformationUpSampleFilterType;

	typename DeformationUpSampleFilterType::Pointer DeformationUpSampleFilter = DeformationUpSampleFilterType::New();
	typedef itk::VectorLinearInterpolateImageFunction< 
	  OutputImageType, double >  DeformationFieldInterpolatorType;
	typename DeformationFieldInterpolatorType::Pointer DeformationFieldInterpolator = DeformationFieldInterpolatorType::New();
	DeformationUpSampleFilter->SetInterpolator( DeformationFieldInterpolator );
	typedef itk::IdentityTransform< double, InputImageDimension >  IdentityTransformType;
	typename IdentityTransformType::Pointer IdentityTransform = IdentityTransformType::New();
	DeformationUpSampleFilter->SetTransform( IdentityTransform );
	DeformationVectorType BlackValue;
	BlackValue.Fill(0);
	DeformationUpSampleFilter->SetDefaultPixelValue( BlackValue );
	DeformationUpSampleFilter->SetOutputSpacing( m_FixedImage->GetSpacing() );
	DeformationUpSampleFilter->SetOutputOrigin( m_FixedImage->GetOrigin() );
	DeformationUpSampleFilter->SetOutputDirection(m_FixedImage->GetDirection());

	//Step 1. Downsample the moving and fixed image
	SizeType FullSize = m_FixedImage->GetLargestPossibleRegion().GetSize();
	for(int s=0;s<InputImageDimension;s++)
	  FullSize[s] = (((int)FullSize[s])>>(2-CurrentLevel));
	DownSampleFilter->SetSize( FullSize );

	// modify spacing accordingly
	typename InputImageType::SpacingType spacing = m_FixedImage->GetSpacing();
	double factor = pow(2.0, 2.0-CurrentLevel);
	for (int k = 0; k < 3; k++)
	  {
	    spacing[k] *= factor;
	  }
	DownSampleFilter->SetOutputSpacing( spacing );

	// std::cout << "Size: " << FullSize << std::endl;
	// std::cout << "Spacing: " << spacing << std::endl;

	DownSampleFilter->SetOutputOrigin( m_FixedImage->GetOrigin() );

	DownSampleFilter->SetInput(m_FixedImage);    
	DownSampleFilter->Update();
	m_CurrFixedImage = DownSampleFilter->GetOutput();
	m_CurrFixedImage->DisconnectPipeline();

	DownSampleFilter->SetInput(m_MovingImage);
	typename AffineTransformType::Pointer idTransform = AffineTransformType::New();
	idTransform->SetIdentity();
	DownSampleFilter->SetTransform( idTransform );
	DownSampleFilter->Update();
	m_CurrMovingImage = DownSampleFilter->GetOutput();
	m_CurrMovingImage->DisconnectPipeline();

	//std::cout << m_CurrFixedImage->GetLargestPossibleRegion() << std::endl;
	//std::cout << m_CurrMovingImage->GetLargestPossibleRegion() << std::endl;
	ImageRegionConstIteratorWithIndex<InputImageType> src( m_CurrFixedImage, m_CurrFixedImage->GetLargestPossibleRegion() );
	int voxel_num=0;
	for(src.GoToBegin();!src.IsAtEnd();++src)
	  {
	    //printf("%d ", src.Get());
	    if(src.Get()==250)
	      voxel_num++;
	  }
	printf("voxel_num=%d\n", voxel_num);

	//m_CurrMovingImage->Print( std::cout );
	typename itk::ImageFileWriter<InputImageType>::Pointer dummywriter = itk::ImageFileWriter<InputImageType>::New();
	dummywriter->SetFileName( "fixed.hdr" );
	dummywriter->SetInput( m_CurrFixedImage );
	dummywriter->Update();

	dummywriter->SetFileName( "moving.hdr" );
	dummywriter->SetInput( m_CurrMovingImage );
	dummywriter->Update();

	//Step 2. Calculate the GMIs w.r.t. to current resolution
	//std::cout << m_CurrFixedImage->GetLargestPossibleRegion() << std::endl;
	if(m_Intensitymode == 0)
	  {
	    attributeFilter->SetInput( m_CurrFixedImage );
	    attributeFilter->Update();
	    //std::cout << m_CurrFixedImage->GetLargestPossibleRegion() << std::endl;
	    m_CurrFixedImageAttribute =  attributeFilter->GetOutput() ;
	    m_CurrFixedImageAttribute->DisconnectPipeline();
	  }
	else
	  {
	    attributeFilter_4I->SetInput( m_CurrFixedImage );
	    attributeFilter_4I->Update();	
	    m_CurrFixedImageAttribute =  attributeFilter_4I->GetOutput() ;
	    m_FixedImageDrivingVoxelQualification[2] = attributeFilter_4I->GetCannyMinimalRequiredEdgeValue();				
	    m_CurrFixedImageAttribute->DisconnectPipeline();
	    SearchThresholdForObtainingTopXPercentOfDrivingVoxels(m_CurrFixedImageAttribute, m_FixedImageDrivingVoxelQualification, m_StartPercent);
	  }
	
	if(m_Intensitymode == 0)
	  {
	    attributeFilter->SetInput( m_CurrMovingImage );
	    attributeFilter->Update();
	    m_CurrMovingImageAttribute = attributeFilter->GetOutput();
	    m_CurrMovingImageAttribute->DisconnectPipeline();
	  }
	else
	  {
	    attributeFilter_4I->SetInput( m_CurrMovingImage );
	    attributeFilter_4I->Update();
	    m_CurrMovingImageAttribute = attributeFilter_4I->GetOutput();
	    m_MovingImageDrivingVoxelQualification[2] = attributeFilter_4I->GetCannyMinimalRequiredEdgeValue();
	    m_MdlCannyMinRecord = m_MovingImageDrivingVoxelQualification[2];
	    m_CurrMovingImageAttribute->DisconnectPipeline();
	    SearchThresholdForObtainingTopXPercentOfDrivingVoxels(m_CurrMovingImageAttribute, m_MovingImageDrivingVoxelQualification, m_StartPercent);
	  }

	//std::cout << "Step 3. Initialize the deformaiton field or upsapmle from the last iteration\n";

	//Step 3. Initialize the deformaiton field or upsapmle from the last iteration
	RegionType dummyRegion = m_CurrFixedImage->GetLargestPossibleRegion();
	// std::cout << dummyRegion << std::endl;
	m_CurrDeformationField = DeformationFieldType::New();
	m_CurrDeformationField->CopyInformation( m_CurrFixedImage );
	m_CurrDeformationField->SetRegions(dummyRegion);
	m_CurrDeformationField->Allocate();
	//if (this->m_AffineInitialization)
	if (0)
	  {
	    std::cout << "Affine initialization done:\n" ;
	    this->m_AffineTransform->Print( std::cout );
	    this->m_AffineInitialization = false;
	    m_UpsampleDeformationField = true;
	    m_InitialDeformationField = true;

	    // convert affine transform into a deformatio field
	    itk::ImageRegionIteratorWithIndex<OutputImageType> itD( m_FinalOutputDeformFld, m_FinalOutputDeformFld->GetRequestedRegion() );
	    std::cout << "m_FinalOutputDeformFld->GetRegion():\n";
	    std::cout << m_FinalOutputDeformFld->GetLargestPossibleRegion() << std::endl;
	    for(itD.GoToBegin();!itD.IsAtEnd();++itD)
	      {  
		typename DeformationFieldType::IndexType idx = itD.GetIndex();
		typename DeformationFieldType::PointType pt0 ;

		m_FinalOutputDeformFld->TransformIndexToPhysicalPoint( idx, pt0 );

		typename AffineTransformType::InputPointType ipt;

		for (unsigned int k = 0; k < 3; k++)
		  {
		    ipt[k] =  pt0[k];
		  }
		typename AffineTransformType::OutputPointType opt = this->m_AffineTransform->TransformPoint( ipt );
		typename DeformationFieldType::PixelType dvec;
		for (unsigned int k = 0; k < 3; k++)
		  {
		    dvec[k] =  opt[k]-ipt[k];
		  }
		itD.Set( dvec );
	      }
	  }

	if(m_UpsampleDeformationField == true && m_InitialDeformationField == true)
	  {
	    //upsample the deformation field from last resolution
	    // printf("Upsample...\n");
	    DeformationFieldPointer m_CurrDeformFld_Copy = DeformationFieldType::New();
	    RegionType dummydeformfieldregion = m_CurrFixedImage->GetLargestPossibleRegion();
	    SizeType dummysize=dummydeformfieldregion.GetSize();
	    for(int s=0;s<InputImageDimension;s++)
	      dummysize[s]/=2;
	    dummydeformfieldregion.SetSize(dummysize);
	    m_CurrDeformFld_Copy->CopyInformation(m_CurrFixedImage);
	    m_CurrDeformFld_Copy->SetRegions(dummydeformfieldregion);
	    DeformationUpSampleFilter->SetSize(FullSize);
	    m_CurrDeformFld_Copy->Allocate();
	    DeformationFieldIteratorType DeformFld_Iter_Copy(m_CurrDeformFld_Copy, m_CurrDeformFld_Copy->GetRequestedRegion());
	    DeformationFieldIteratorType DeformFld_Iter(m_FinalOutputDeformFld, m_FinalOutputDeformFld->GetRequestedRegion());
	    for(DeformFld_Iter.GoToBegin(),DeformFld_Iter_Copy.GoToBegin();!DeformFld_Iter_Copy.IsAtEnd();++DeformFld_Iter,++DeformFld_Iter_Copy)
	      {  
		DeformFld_Iter_Copy.Set(DeformFld_Iter.Get());
	      }

	    DeformationUpSampleFilter->SetInput(m_CurrDeformFld_Copy);
	    DeformationUpSampleFilter->Update();
	    m_CurrDeformationField = DeformationUpSampleFilter->GetOutput();
	    m_CurrDeformationField->DisconnectPipeline();
	    DeformationFieldIteratorType CurrDeformFld_Iter(m_CurrDeformationField, m_CurrDeformationField->GetRequestedRegion());
	    DeformationVectorType Deform_Last_Res;
	    float deform_ratio = ( 1.0 + m_Confidential ) ;
	    for(CurrDeformFld_Iter.GoToBegin(),CurrDeformFld_Iter.GoToBegin();!CurrDeformFld_Iter.IsAtEnd();++CurrDeformFld_Iter)
	      {
		Deform_Last_Res = CurrDeformFld_Iter.Get();
		for(int s =0;s<InputImageDimension;s++)
		  Deform_Last_Res[s] = Deform_Last_Res[s]*deform_ratio;
		CurrDeformFld_Iter.Set(Deform_Last_Res);
	      }
	  }
	else
	  {
	    //Fill 0
	    // printf("Initilize with zeros...\n");
	    DeformationFieldIteratorType DeformFld_Iter(m_CurrDeformationField, m_CurrDeformationField->GetRequestedRegion());
	    DeformationVectorType Deform_Zero;
	    for(int k=0;k<InputImageDimension;k++)
	      Deform_Zero[k] = 0;
	    for(DeformFld_Iter.GoToBegin();!DeformFld_Iter.IsAtEnd();++DeformFld_Iter)
	      {
		DeformFld_Iter.Set(Deform_Zero);
	      }
	  }

	//Step 4. Initialize the driving voxels

	/*
	  MdlThreshold.Geom_UP = 0.9*255 ;
	  MdlThreshold.Geom_DN = 0.4*255 ;
	  MdlThreshold.VNvlm_UP = 255/12 ;
	  MdlThreshold.VNvlm_DN = 170 ; 
	  MdlThreshold.CSFBG_UP = 255/4 ;
	*/
	if(m_Intensitymode == 0)
	  {
	    m_FixedImageDrivingVoxelQualification[0] = (0.9*255);               //Geom_Up
	    m_FixedImageDrivingVoxelQualification[1] = (0.4*255);               //Geom_DN
	    m_FixedImageDrivingVoxelQualification[2] = (0.4*255/1.1);			//Gemo_DNUP
	    m_FixedImageDrivingVoxelQualification[3] = (255/12);				//VNvlm_UP
	    if(m_Softmode==1)
	      m_FixedImageDrivingVoxelQualification[4] = 250;					
	    else
	      m_FixedImageDrivingVoxelQualification[4] = 170;					
	    m_FixedImageDrivingVoxelQualification[5] = (255/4);					//CSFBG_UP
	  }
			
	IdentifyDrivingVoxelsInFixedImage(m_CurrFixedImageAttribute, m_FixedImageDrivingVoxelQualification);
	std::cout<<"Driving voxel in template image is "<<m_PickedPointsOnFixedImage.size()<<std::endl;
	// std::cout<<"Driving voxel in model image is "<<m_PickedPointsOnFixedImage.size()<<std::endl;
	//     for(int k=0;k<m_PickedPointsOnFixedImage.size();k++)
	//       std::cout<<m_PickedPointsOnFixedImage[k]<<std::endl;
	/*
	  alpha = 0.9 ; 
	  beta  = 0.4;
	  ObjThreshold.Geom_UP = alpha*255 ;
	  ObjThreshold.Geom_DN = beta*255 ;
	  Geom_DownsUp = ObjThreshold.Geom_DN/1.1 ; 
	  ObjThreshold.VNvlm_UP = 255/4 ;  
	  ObjThreshold.CSFBG_UP = 255/4 ;
	*/

	if(m_Intensitymode == 0)
	  {
	    m_MovingImageDrivingVoxelQualification[0] = (0.9*255);				//ObjThreshold.Geom_UP
	    m_MovingImageDrivingVoxelQualification[1] = (0.4*255);				//ObjThreshold.Geom_DN
	    m_MovingImageDrivingVoxelQualification[2] = (0.4*255/1.1);			//Geom_DownsUp
	    m_MovingImageDrivingVoxelQualification[3] = (255/4);				//ObjThreshold.VNvlm_UP
	    m_MovingImageDrivingVoxelQualification[4] = (250);
	    m_MovingImageDrivingVoxelQualification[5] = (255/4);				//ObjThreshold.CSFBG_UP
	  }
		
	IdentifyDrivingVoxelsInMovingImage(m_CurrMovingImageAttribute, m_MovingImageDrivingVoxelQualification);
	std::cout<<"Driving voxel in subject image is "<<m_PickedPointsOnMovingImage.size()<<std::endl;
	m_InverseDisplacement.clear();
	m_ModelDrivingPointDisplacement.clear();
	m_ModelAttributeVector.clear();
	m_InverseDisplacement.resize(m_PickedPointsOnFixedImage.size());
	m_ModelDrivingPointDisplacement.resize(m_PickedPointsOnFixedImage.size());

	for(unsigned int k=0;k<m_PickedPointsOnFixedImage.size();k++)
	  {
	    //DeformationVectorType dfm = m_CurrDeformationField->GetPixel(m_PickedPointsOnFixedImage[k]);
	    for(int s=0;s<InputImageDimension;s++)
	      {  
		m_ModelDrivingPointDisplacement[k][s] = m_PickedPointsOnFixedImage[k][s];// + dfm[s];
		m_InverseDisplacement[k][s] = 0;
	      }
	    AttributeVectorType Feature = m_CurrFixedImageAttribute->GetPixel(m_PickedPointsOnFixedImage[k]);
	    m_ModelAttributeVector.push_back(Feature);
	  }

	//step 4.2 Initialize the Control Point for TPS interpolation
	if(m_TPSmode ==1)
	  {
	    ControlPointList.clear();
	    ControlPointDeform.clear();				
	    ControlPointNum = m_PickedPointsOnFixedImage.size();
	    ControlPointList.resize(ControlPointNum);
	    ControlPointDeform.resize(ControlPointNum);
	    for(int k=0;k<ControlPointNum;k++)
	      {
		ControlPointList.push_back(m_PickedPointsOnFixedImage[k]);
	      }
	  }

	//Step 5. HAMMER_Mainloop
	HAMMERMainLoop(m_CurrFixedImageAttribute, m_CurrMovingImageAttribute, m_CurrDeformationField, progress);
	// std::cout << "dummyRegion: \n" << dummyRegion << std::endl;

	//Step 6. Restore the deformation field    
	DeformationFieldIteratorType DeformFld_Iter(m_CurrDeformationField, m_CurrDeformationField->GetRequestedRegion());
	DeformationFieldIteratorType DeformFld_Iter_Copy(m_FinalOutputDeformFld, m_FinalOutputDeformFld->GetRequestedRegion());
	for(DeformFld_Iter.GoToBegin(),DeformFld_Iter_Copy.GoToBegin();!DeformFld_Iter.IsAtEnd();++DeformFld_Iter,++DeformFld_Iter_Copy)
	  {
	    DeformFld_Iter_Copy.Set(DeformFld_Iter.Get());
	  }
      }//end of Cur_Level

    //warp the moving image
    /*
      printf("Generate the result\n");
      InputImagePointer WarpedImage = InputImageType::New();
      WarpedImage->CopyInformation(m_FixedImage);
      WarpedImage->SetRegions(m_FixedImage->GetLargestPossibleRegion());
      WarpedImage->Allocate();

      DeformationFieldIteratorType DeformFld_Iter_Dump(m_FinalOutputDeformFld, m_FinalOutputDeformFld->GetRequestedRegion());
      InputImageIteratorType WarpedImage_Iter(WarpedImage, WarpedImage->GetLargestPossibleRegion());
      DeformFld_Iter_Dump.GoToBegin();
      WarpedImage_Iter.GoToBegin();
      for( ; !DeformFld_Iter_Dump.IsAtEnd();)
      {        
      DeformationVectorType dumpdfm = DeformFld_Iter_Dump.Get();
      IndexType index = DeformFld_Iter_Dump.GetIndex();
      InputPixelType MovingValue = 0; 
      for(int s=0;s<InputImageDimension;s++)
      {
      index[s] += (int)(dumpdfm[s]+0.5);
      }
      if( m_MovingImage->GetLargestPossibleRegion().IsInside(index) )
      {
      MovingValue = m_MovingImage->GetPixel(index);
      }
      WarpedImage_Iter.Set(MovingValue);
      ++DeformFld_Iter_Dump;
      ++WarpedImage_Iter;
      }

      typedef itk::ImageFileWriter<InputImageType> ImageWriterType;
      typename ImageWriterType::Pointer writer = ImageWriterType::New();
      writer->SetFileName("warpresult.hdr");
      writer->SetInput(WarpedImage);
      writer->Update();
    */

    itk::ImageRegionIteratorWithIndex<DeformationFieldType>
      itDf(m_FinalOutputDeformFld, m_FinalOutputDeformFld->GetRequestedRegion());
    for ( itDf.GoToBegin(); !itDf.IsAtEnd(); ++itDf )
      {
	typename DeformationFieldType::PixelType vec = itDf.Get();
	typename DeformationFieldType::IndexType idx = itDf.GetIndex();
	typename DeformationFieldType::PointType pt0;
	m_FinalOutputDeformFld->TransformIndexToPhysicalPoint( idx, pt0 );
	itk::ContinuousIndex<double, 3> cIdx;
	for (unsigned int k = 0; k < 3; k++)
	  {
	    cIdx[k] = idx[k]+vec[k];
	  }
	typename DeformationFieldType::PointType pt1;
	m_FinalOutputDeformFld->TransformContinuousIndexToPhysicalPoint( cIdx, pt1 );
	for (unsigned int k = 0; k < 3; k++)
	  {
	    vec[k] = pt1[k]-pt0[k];
	  }
	itDf.Set( vec );
      }

    return;
  }

  /**
   * Standard "PrintSelf" method
   */
  template <class TInputImage, class TOutputImage>
  void
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::PrintSelf(std::ostream& os, Indent indent) const
  {
    Superclass::PrintSelf( os, indent );
  }

  /**
   * Create the neighborhood in point matching
   **/
  template <class TInputImage, class TOutputImage>
  void 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::CreatePointMatchingNeighbor(IndexArrayType &Neighbor, int Radius)
  {
    //CalculateNeighborhoodbyIncreasingRadius(m_PointMatchingNeighborhood, Radius);
    Neighbor.clear();
    CreateSearchNeighbor(Neighbor, Radius);
  }

  /**
   * Create the neighborhood in subvolumn deform
   */
  template <class TInputImage, class TOutputImage>
  void 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::CreateSubvolumnNeighbor(IndexArrayType &Neighbor, int Radius)
  {
    Neighbor.clear();
    CalculateNeighborhoodbyIncreasingRadius(Neighbor, Radius);  
  }

  /**
   * Create the neighbor from center to outside
   **/
  template <class TInputImage, class TOutputImage>
  void
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::CalculateNeighborhoodbyIncreasingRadius(IndexArrayType &Neighbor, int Radius)  throw (InvalidRequestedRegionError) 
  {
    // pad input requested region by 1, so the simple edge detection
    // works on the entire image domain

    IndexType dummyIdx;
    int x, y, z;
    int Half_Radius = Radius/2;

    for(int s=0;s<InputImageDimension;s++)
      dummyIdx[s] = 0;
    Neighbor.push_back(dummyIdx);
    for(int r=1;r<=Half_Radius;r++)
      {
	for(z=-r; z<=r; z+=2*r)
	  {  
	    for(x=-r; x<=r; x++)
	      {
		for(y=-r; y<=r; y++)
		  {
		    dummyIdx[1] = x;
		    dummyIdx[0] = y;
		    dummyIdx[2] = z;
		    //std::cout<<dummyIdx<<std::endl;
		    Neighbor.push_back(dummyIdx);
		  }
	      }
	  }

	for(x=-r; x<=r; x+=2*r)
	  {
	    for(z=-r+1; z<=r-1; z++) 
	      {
		for(y=-r; y<=r; y++)
		  {
		    dummyIdx[1] = x;
		    dummyIdx[0] = y;
		    dummyIdx[2] = z;
		    Neighbor.push_back(dummyIdx);
		  }
	      }
	  }

	for(y=-r; y<=r; y+=2*r)
	  {
	    for(z=-r+1; z<=r-1; z++) 
	      {
		for(x=-r+1; x<=r-1; x++) 
		  {
		    dummyIdx[1] = x;
		    dummyIdx[0] = y;
		    dummyIdx[2] = z;
		    Neighbor.push_back(dummyIdx);
		  }
	      }
	  }
      }
  }

  /**
   *  Create the search neighborhood
   **/
  template <class TInputImage, class TOutputImage>
  void
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::CreateSearchNeighbor(IndexArrayType &Neighbor, int Radius)
  {  
    SpacingType inputSpacing;
    Size<InputImageDimension> sphereRadius;
    for (int i = 0; i < InputImageDimension; ++i)
      {
	//sphereRadius[i] = static_cast<unsigned long>( this->m_Scale/inputSpacing[i] );
	sphereRadius[i] = static_cast<unsigned long>(Radius);
      }

    // compute spherical neighborhood for geometrical attribute
    // computation
    InputImagePointer dummyImage = InputImageType::New();
    RegionType dummyRegion;
    PointType dummyOrigin;
    IndexType dummyStart;
    SizeType dummySize;

    for(int s=0;s<InputImageDimension;s++)
      inputSpacing[s] = 1;
    dummyImage->SetSpacing( inputSpacing );
    for (int k = 0; k < InputImageDimension; k++)
      {
	dummySize[k] = sphereRadius[k]+sphereRadius[k]+1;
	dummyStart[k] = -sphereRadius[k];
	dummyOrigin[k] = 0;
      }
    dummyRegion.SetIndex( dummyStart );
    dummyRegion.SetSize( dummySize );
    dummyImage->SetRegions( dummyRegion );
    dummyImage->SetOrigin( dummyOrigin );

    itk::ImageRegionIteratorWithIndex<InputImageType> it( dummyImage, dummyRegion );
    for (it.GoToBegin(); !it.IsAtEnd(); ++it)
      {
	Neighbor.push_back(it.GetIndex());                
      }  
  }


  /**
   * the metric function
   **/
  template <class TInputImage, class TOutputImage>
  float
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::SimilarityBetweenTwoImgAttribute(AttributeVectorType &Template_Feature, AttributeVectorType &Subject_Feature, bool inSubVMatch) const
  {
    return Template_Feature.ComputeSimilarity(Subject_Feature, m_DIF_THR, inSubVMatch);
  }

  /**
   * compute the magnitude of vector
   **/
  template <class TInputImage, class TOutputImage>
  float 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>  
  ::ComputeVectorMagnitude(DeformationVectorType Deform_Vector) const
  {
    return Deform_Vector.GetNorm();
  }

  /**
   * the core function which determine the correspondence on model driving voxel array
   */
  template <class TInputImage, class TOutputImage>
  float 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::DetermineCorrespondenceOnOneDrivingVoxel(ImageAttributePointerType &FixedAttributeImage, ImageAttributePointerType &MovingAttributeImage, DeformationFieldPointer &DeformFld, int DrivingPointIndex, DeformationFieldPointer DeformFld_Last, DeformationVectorType &DeformationUpdate, int SearchRadius, int Step) const
  {
    int i, j, k;
    float PointSimilarity, NeighborhoodSimilarity, MaxSimilarityDegree;
    float DisplacementMagnitude;
    RegionType dummyRegion = FixedAttributeImage->GetLargestPossibleRegion();
    AttributeVectorType TemplateFeature, SubjectFeature;
    DeformationVectorType DeformationOnParticularPoint, TentativeWarp;
    DeformationUpdate.Fill( 0 );
    float MinDistance, MinDistThresh;


    int SmplStep =  m_SubvolumeNeighborhood.size()/20;
    IndexType ImageIndex;                             
    //if(!m_FixedImage->TransformPhysicalPointToIndex(m_PickedPointsOnFixedImage[DrivingPointIndex], ImageIndex))
    //  printf("Model driving point is out of the image boundary.\n");
    ImageIndex = m_PickedPointsOnFixedImage[DrivingPointIndex];

    //Step 1: check whether the the input point has the inverse force
    float Displacement_Magnitude = ComputeVectorMagnitude(m_InverseDisplacement[DrivingPointIndex]);
    if(Displacement_Magnitude>0 && m_IterationRatio<=ITER_THRD)
      {    
	DeformationVectorType CurrentDeformation, PreviousDeformation;
	CurrentDeformation = DeformFld->GetPixel(ImageIndex);
	PreviousDeformation = DeformFld_Last->GetPixel(ImageIndex);
	//std::cout<<CurrentDeformation<<"     "<<PreviousDeformation<<std::endl;
	DeformationUpdate = PreviousDeformation - CurrentDeformation + m_InverseDisplacement[DrivingPointIndex]*(1.0+(1.0 + m_IterationRatio))/2.0;

	//std::cout<<m_InverseDisplacement[DrivingPointIndex]<<"         "<<DeformationUpdate;

	DisplacementMagnitude = ComputeVectorMagnitude(DeformationUpdate);
	if(DisplacementMagnitude > (m_SubvolumeRadius>>1))
	  {
	    DeformationUpdate = DeformationUpdate/DisplacementMagnitude*(m_SubvolumeRadius>>1);
	  }
	return 10000.0 ;
      }

    //Step 2: find the correspondence of particular point
    MaxSimilarityDegree = 0;
    TentativeWarp.Fill(0);
    TemplateFeature = FixedAttributeImage->GetPixel(ImageIndex);
    DeformationOnParticularPoint = DeformFld->GetPixel(ImageIndex);
    MinDistThresh = 10000.0;
		
    for(i=-SearchRadius;i<=SearchRadius;i+=Step)
      for(j=-SearchRadius;j<=SearchRadius;j+=Step)
	for(k=-SearchRadius;k<=SearchRadius;k+=Step)
	  {
	    IndexType SubjIdx;
	    SubjIdx[0] = int(DeformationOnParticularPoint[0] + ImageIndex[0] + j + 0.5);
	    SubjIdx[1] = int(DeformationOnParticularPoint[1] + ImageIndex[1] + i + 0.5);
	    SubjIdx[2] = int(DeformationOnParticularPoint[2] + ImageIndex[2] + k + 0.5);

	    if(!dummyRegion.IsInside(SubjIdx))
	      {
	      continue;
	      }

	    SubjectFeature = MovingAttributeImage->GetPixel(SubjIdx);
	    PointSimilarity = SimilarityBetweenTwoImgAttribute(TemplateFeature, SubjectFeature, false);
	    //Step 2.2: compare the similarity between two neighborhood
	    if(m_Intensitymode == 0)
	      {
		if(PointSimilarity>m_PointMatchingThreshold || (m_IsBigVN== true && TemplateFeature.GetVentricleVolume()>0 && SubjectFeature.GetVentricleVolume()>0))
		  {      
		    TentativeWarp[0] = j;
		    TentativeWarp[1] = i;
		    TentativeWarp[2] = k;
		    NeighborhoodSimilarity = SubVolumnMatching(FixedAttributeImage, MovingAttributeImage, DeformFld, ImageIndex, TentativeWarp, m_SubvolumeNeighborhood, SmplStep, &MinDistance, MinDistThresh);
		    if(NeighborhoodSimilarity>MaxSimilarityDegree)
		      {      
			DeformationUpdate = TentativeWarp;
			MaxSimilarityDegree = NeighborhoodSimilarity;
			MinDistThresh = MinDistance;
		      }
		  }
	      }
	    else
	      {
		if(((TemplateFeature.GetCSFBackground()>0 && SubjectFeature.GetCSFBackground()>0)||(TemplateFeature.GetCSFBackground()==0 && SubjectFeature.GetCSFBackground()==0)) && (PointSimilarity>m_PointMatchingThreshold || (m_IsBigVN== true && TemplateFeature.GetVentricleVolume()>0 && SubjectFeature.GetVentricleVolume()>0)))
		  {      
		    TentativeWarp[0] = j;
		    TentativeWarp[1] = i;
		    TentativeWarp[2] = k;
		    NeighborhoodSimilarity = SubVolumnMatching(FixedAttributeImage, MovingAttributeImage, DeformFld, ImageIndex, TentativeWarp, m_SubvolumeNeighborhood, SmplStep, &MinDistance, MinDistThresh);
		    if(NeighborhoodSimilarity>MaxSimilarityDegree)
		      {      
			DeformationUpdate = TentativeWarp;
			MaxSimilarityDegree = NeighborhoodSimilarity;
			MinDistThresh = MinDistance;
		      }
		  }
	      }
	  }
    return ( MaxSimilarityDegree );
  }


  template <class TInputImage, class TOutputImage>
  float 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::SubVolumnMatching(ImageAttributePointerType &FixedAttributeImage, ImageAttributePointerType &MovingAttributeImage, DeformationFieldPointer &DeformFld, IndexType &ImageIndex, DeformationVectorType TentativeWarp, IndexArrayType CertainNeighborhood, int NeighborhoodStep, float *MinDist, float MinDist_Threshold) const
  {
    float NeighborhoodSimilarity, CurrentSimilarity, DisplacementMagnitude;
    int RealSize = 0;
    AttributeVectorType SubvolumnTemplateFeature, SubvolumnSubjectFeature;
    IndexType MdlIdx, SubjIdx;
    DeformationVectorType DeformationOnParticularPoint, CurrLocation;
    RegionType dummyFixedImageRegion, dummyMovingImageRegion;
    float DistSeg, Overall_Count;

    NeighborhoodSimilarity = 0;
    DistSeg = 0;
    Overall_Count = CertainNeighborhood.size()/NeighborhoodStep;
    dummyFixedImageRegion = FixedAttributeImage->GetLargestPossibleRegion();
    dummyMovingImageRegion = MovingAttributeImage->GetLargestPossibleRegion();
    for(unsigned int SubvolumnIter=0; SubvolumnIter < CertainNeighborhood.size(); SubvolumnIter += NeighborhoodStep)
      {
	DisplacementMagnitude = 0;
	//if(SubvolumnIter==2520)
	//  printf("");
	//std::cout<<SubvolumnIter<<":";
	for(int s=0;s<InputImageDimension;s++)
	  {
	    MdlIdx[s] = ImageIndex[s] + CertainNeighborhood[SubvolumnIter][s];

	    DisplacementMagnitude += abs(CertainNeighborhood[SubvolumnIter][s]);
	  }
	DisplacementMagnitude /= InputImageDimension;
	if(!dummyFixedImageRegion.IsInside(MdlIdx))
	  {
	    continue;
	  }

	SubvolumnTemplateFeature = FixedAttributeImage->GetPixel(MdlIdx);        
	DeformationOnParticularPoint = DeformFld->GetPixel(MdlIdx);

	for(int s=0;s<InputImageDimension;s++)
	  {
	    CurrLocation[s] =  MdlIdx[s]+ DeformationOnParticularPoint[s] + TentativeWarp[s]*m_GuassianAtLevelSigma[m_SubvolumeRadius][(int)DisplacementMagnitude];
	    SubjIdx[s] = (int)(CurrLocation[s] + 0.5);
	    //printf("%d %d %f %f %f\n", SubjIdx[s], MdlIdx[s], DeformationOnParticularPoint[s], TentativeWarp[s], m_GuassianAtLevelSigma[(int)DisplacementMagnitude][m_PointMatchRadius]);
	  }

	if(!dummyMovingImageRegion.IsInside(SubjIdx))
	  {
	    CurrentSimilarity = 0;
	  }
	else
	  {
	    SubvolumnSubjectFeature = MovingAttributeImage->GetPixel(SubjIdx);
	    if(m_Intensitymode == 1)
	      CurrentSimilarity = SimilarityBetweenTwoImgAttribute(SubvolumnSubjectFeature, SubvolumnTemplateFeature, true);
	    else
	      CurrentSimilarity = SimilarityBetweenTwoImgAttribute(SubvolumnSubjectFeature, SubvolumnTemplateFeature, false);
	    if(CurrentSimilarity < 0.6 && m_IsYoungBrain == true && SubvolumnTemplateFeature.GetEdge() == 0)
	      {
		CurrentSimilarity = 0;
	      }

	    RealSize ++;
	    if( SubvolumnTemplateFeature.GetEdge()>0 ) /* June 6, 2001*/
	      {
		CurrentSimilarity *= 1.2 ;
		RealSize += (1.2-1.0) ;		
		//RealSize = static_cast<int> ( static_cast<float>(RealSize)+0.2 ) ;
	      }
	    //printf("%d %f %f", RealSize, CurrentSimilarity, DistSeg);
	  }
	DistSeg += (1-CurrentSimilarity)/Overall_Count;
	if( DistSeg>MinDist_Threshold) 
	  break ; /* no need to continue on this selected deformation */
	NeighborhoodSimilarity += CurrentSimilarity;
	//std::cout<<std::endl;
      }
    if(RealSize>0)
      {
	NeighborhoodSimilarity /= RealSize;
      }
    *MinDist = DistSeg;
    return NeighborhoodSimilarity;  
  }


  template <class TInputImage, class TOutputImage>
  void 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::FindingInverseForceFromSubject(ImageAttributePointerType &FixedAttributeImage, ImageAttributePointerType &MovingAttributeImage, DeformationFieldPointer &DeformFld, int SearchRadius) const
  {
    typedef itk::Image<int, InputImageDimension> ModelDrivingPointImageType;
    typename ModelDrivingPointImageType::Pointer ModelDrivingPointImage = ModelDrivingPointImageType::New();
    float MinDistThresh;
    RegionType dummyRegion;
    unsigned int PointID;

    int SmplStep;
    IndexType SubjIndex;
    AttributeVectorType TemplateFeature, SubjectFeature;
    DeformationVectorType TentativeWarp, MaxDeform;


    unsigned int MdlPickedPointNum;
    std::vector<float> Multiple;
    int incre;

    incre = 2*SearchRadius/6 ; 
    if(incre<1)
      incre=1 ;  

    dummyRegion = FixedAttributeImage->GetLargestPossibleRegion();
    ModelDrivingPointImage->SetRegions(dummyRegion);
    ModelDrivingPointImage->Allocate();

    typedef itk::ImageRegionIterator<ModelDrivingPointImageType> IteratorType;
    IteratorType Itor(ModelDrivingPointImage, dummyRegion);
    for(Itor.GoToBegin();!Itor.IsAtEnd();++Itor)
      {
	Itor.Set(-1);
      }

    SmplStep = m_SubvolumeNeighborhood.size()/20;

    MdlPickedPointNum = m_PickedPointsOnFixedImage.size();
    Multiple.resize(MdlPickedPointNum);
    for( PointID=0; PointID < MdlPickedPointNum; PointID++)
      {
	//MdlIndex = m_PickedPointsOnFixedImage[PointID];
	//TentativeWarp = this->m_OutputDeformFld->GetPixel(MdlIndex);
	//for(s=0;s<InputImageDimension;s++)
	//  SubjIndex[s] = (int)(MdlIndex[s] + TentativeWarp[s]);
	for(unsigned int s=0;s<InputImageDimension;s++)
	  {
	    SubjIndex[s] = (int)(m_ModelDrivingPointDisplacement[PointID][s]);
	    m_InverseDisplacement[PointID][s] = 0;
	  }
	if(!dummyRegion.IsInside(SubjIndex))
	  {
	  continue;
	  }
	ModelDrivingPointImage->SetPixel(SubjIndex, PointID);
	Multiple[PointID]= 0;
      }

    unsigned int TotalSamples = m_PickedPointsOnFixedImage.size()/2;
    if(TotalSamples>m_PickedPointsOnMovingImage.size()) 
      TotalSamples = m_PickedPointsOnMovingImage.size() ;
    if(TotalSamples>20000)  
      TotalSamples = 20000 ;
    float StepDesign = (float)(m_PickedPointsOnMovingImage.size())/(float)TotalSamples ; 
    MinDistThresh = 10000.0;

    int PointMatched = 0;
    PointID = 0;

    for(float l=0; PointID < m_PickedPointsOnMovingImage.size(); l += StepDesign, PointID=(int)l)
      {    
	float MaxSimilarityDegree = 0;
	MaxDeform.Fill(0);
	SubjectFeature = MovingAttributeImage->GetPixel(m_PickedPointsOnMovingImage[PointID]);

	int dfm_s = 0;    
	for(int i=-SearchRadius;i<=SearchRadius;i+=incre)
	  {
	    for(int j=-SearchRadius;j<=SearchRadius;j+=incre)
	      {
		for(int k=-SearchRadius;k<=SearchRadius;k+=incre)
		  {
		    SubjIndex[0] = static_cast<int>(m_PickedPointsOnMovingImage[PointID][0]) + j;
		    SubjIndex[1] = static_cast<int>(m_PickedPointsOnMovingImage[PointID][1]) + i;
		    SubjIndex[2] = static_cast<int>(m_PickedPointsOnMovingImage[PointID][2]) + k;

		    if(!dummyRegion.IsInside(SubjIndex))
		      {
			continue;
		      }
		    
		    int MdlPickedPointID = ModelDrivingPointImage->GetPixel(SubjIndex);

		    if( MdlPickedPointID >= 0)
		      {
			//TemplateFeature = m_FixedAttributeImage->GetPixel(MdlIndex);
			TemplateFeature = m_ModelAttributeVector[MdlPickedPointID];
			if(TemplateFeature.GetCSFBackground()>0&&SubjectFeature.GetCSFBackground()==0 || TemplateFeature.GetCSFBackground()==0&&SubjectFeature.GetCSFBackground()>0)
			  continue;
			float PointSimilarity = SimilarityBetweenTwoImgAttribute(TemplateFeature, SubjectFeature, false);
			if(PointSimilarity>m_PointMatchingThreshold)
			  {
			    TentativeWarp[0] = -j;
			    TentativeWarp[1] = -i;
			    TentativeWarp[2] = -k;
			    typename InputImageType::IndexType MdlIndex = m_PickedPointsOnFixedImage[MdlPickedPointID];
			    float MinDistance = 0;
			    float SubvolumnSimilarity = SubVolumnMatching(FixedAttributeImage, MovingAttributeImage, DeformFld, MdlIndex, TentativeWarp, m_SubvolumeNeighborhood, SmplStep, &MinDistance, MinDistThresh);

			    if(SubvolumnSimilarity>MaxSimilarityDegree)
			      {
				MaxSimilarityDegree = SubvolumnSimilarity;
				for(unsigned int s=0;s<InputImageDimension;s++)
				  {
				    MaxDeform[s] = TentativeWarp[s];
				  }
				dfm_s = MdlPickedPointID;
			      }
			  }
		      }
		  }
	      }
	  }

	if(MaxSimilarityDegree>m_SubvolumnSimilarityThreshold)
	  {        
	    PointMatched++;
	    for(unsigned int s=0;s<InputImageDimension;s++)
	      m_InverseDisplacement[dfm_s][s] += MaxDeform[s];
	    Multiple[dfm_s]+=1.0;
	    //std::cout<<PointMatched<<"("<<l<<">"<<":"<<MaxSimilarityDegree<<"-"<<dfm_s<<std::endl;
	  }
      }
    std::cout<<PointMatched<<" Matched."<<std::endl;
    //   printf("Inverse force list\n");
    //   for(PointID=0;PointID<MdlPickedPointNum;PointID++)
    //   {
    //     printf("%d (%f %f %f) %f\n", PointID, m_InverseDisplacement[PointID][0], m_InverseDisplacement[PointID][1], m_InverseDisplacement[PointID][2], Multiple[PointID]);
    //   }
    int PickupNum = 0;
    for(PointID=0;PointID<MdlPickedPointNum;PointID++)
      {
	if(Multiple[PointID]>0)
	  {
	    //std::cout<<m_InverseDisplacement[PointID]<<"   "<<Multiple[PointID]<<std::endl;
	    for(unsigned int s=0;s<InputImageDimension;s++)
	      {
		m_InverseDisplacement[PointID][s] /= Multiple[PointID];
	      }    
	    PickupNum++;
	  }    
      }
    std::cout<<"Inverse force ratio = "<<PickupNum<<"/"<<m_InverseDisplacement.size()<<std::endl;
  }

  template <class TInputImage, class TOutputImage>
  void 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::DisseminateDeformation(DeformationFieldPointer &DeformFld, const int &DrivingPointIndex, DeformationVectorType TentativeWarp, IndexArrayType CertainNeighborhood, int NeighborhoodSize, int GaussianSigma)
  {
    int PointID, s;
    float CenterRate, Delta;
    IndexType CenterPointIndex, StudyPointIndex, SurroundingPointIndex;
    float Distance, Weight;
    DeformationVectorType MeanDeform, DeformationOnCertainPoint, DeformationOnCenterPoint, DeformUpdate;
    RegionType dummyRegion;

    dummyRegion = DeformFld->GetLargestPossibleRegion();
    CenterRate = m_SmoothFactor; 
    Delta = 0.005*m_IterationRatio ; /* June 6, 2001*/
    //   if(!m_FixedImage->TransformPhysicalPointToIndex(m_PickedPointsOnFixedImage[DrivingPointIndex], CenterPointIndex))
    //     {
    //     printf("Model driving point is out of the image boundary.\n");
    //     }

    CenterPointIndex = m_PickedPointsOnFixedImage[DrivingPointIndex];
    //std::cout<<CenterPointIndex<<std::endl;
    for(PointID=0;PointID<NeighborhoodSize;PointID++)
      {    
	Distance = 0;
	//std::cout<<CertainNeighborhood[PointID]<<std::endl;
	for(s=0;s<InputImageDimension;s++)
	  {
	    MeanDeform[s] = 0;
	    StudyPointIndex[s] = CenterPointIndex[s] + CertainNeighborhood[PointID][s];
	    Distance += abs(CertainNeighborhood[PointID][s]);
	  }
	//std::cout<<StudyPointIndex<<std::endl;
	Distance /= InputImageDimension;
	if(!dummyRegion.IsInside(StudyPointIndex))
	  {
	    continue;
	  }

	Weight = m_GuassianAtLevelSigma[GaussianSigma][(int)Distance];
	float TempNum = 0;
	for(int n=1;n<27;n++) //need to find better way 
	  {
	    for(s=0;s<InputImageDimension;s++)
	      {
		SurroundingPointIndex[s] = StudyPointIndex[s] + m_SubvolumeNeighborhood[n][s];
	      }
	    
	    if(!dummyRegion.IsInside(SurroundingPointIndex))
	      {
		continue;
	      }

	    DeformationOnCertainPoint = DeformFld->GetPixel(SurroundingPointIndex);
	    for(s=0;s<InputImageDimension;s++)
	      {
		MeanDeform[s] += DeformationOnCertainPoint[s];
	      }

	    TempNum += 1.0;
	  }
	if(TempNum>0)
	  {
	    for(s=0;s<InputImageDimension;s++)
	      MeanDeform[s] /= TempNum;
	  }
	DeformationOnCenterPoint = DeformFld->GetPixel(StudyPointIndex);
	//std::cout<<StudyPointIndex<<":"<<DeformationOnCenterPoint<<"-->";
	for(s=0;s<InputImageDimension;s++)
	  {
	    MeanDeform[s] = (MeanDeform[s]-DeformationOnCenterPoint[s])*CenterRate;
	    DeformUpdate[s] = (MeanDeform[s]+TentativeWarp[s]*Weight)*(m_DeformRate+Delta);
	    DeformationOnCenterPoint[s] += DeformUpdate[s];
	  }
	//std::cout<<DeformationOnCenterPoint<<std::endl;
	//std::cout<<DeformUpdate<<std::endl;
	//std::cout<<StudyPointIndex<<std::endl;
	DeformFld->SetPixel(StudyPointIndex, DeformationOnCenterPoint);
      }
  }

  template <class TInputImage, class TOutputImage>
  void 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::FitGlobalAffineTransform(DeforamtionVectorArrayType CurrentDeformationOnDrivingPoint, 
			     DeforamtionVectorArrayType PreviousDeformationOnDrivingPoint)
  {
    vnl_matrix<double> ModelMatrix, ObjectMatrix, FittingMatrix, TmpM1, TmpM2, InverseMatrix;

    /* create  metrices*/
    ModelMatrix.set_size(4, CurrentDeformationOnDrivingPoint.size());
    ObjectMatrix.set_size(4, CurrentDeformationOnDrivingPoint.size());
    FittingMatrix.set_size(4, CurrentDeformationOnDrivingPoint.size());

    /* get ModelMatrix->data[4][MdlVer_Num] */
    for(unsigned int j=0; j<CurrentDeformationOnDrivingPoint.size(); j++)
      {
	//std::cout<<CurrentDeformationOnDrivingPoint[j]<<std::endl;
	ModelMatrix[0][j] = PreviousDeformationOnDrivingPoint[j][1] ;
	ModelMatrix[1][j] = PreviousDeformationOnDrivingPoint[j][0] ;
	ModelMatrix[2][j] = PreviousDeformationOnDrivingPoint[j][2] ;
	ModelMatrix[3][j] = 1 ;
      }

    /* get ObjectMatrix->data[4][MdlVer_Num] */
    for(unsigned int j=0; j<CurrentDeformationOnDrivingPoint.size(); j++)
      {
	//std::cout<<PreviousDeformationOnDrivingPoint[j]<<std::endl;
	ObjectMatrix[0][j] = CurrentDeformationOnDrivingPoint[j][1] ;
	ObjectMatrix[1][j] = CurrentDeformationOnDrivingPoint[j][0] ;
	ObjectMatrix[2][j] = CurrentDeformationOnDrivingPoint[j][2] ;
	ObjectMatrix[3][j] = 1 ;
      }


    /*get matrix 4X4 TmpM1*/
    TmpM1 = ModelMatrix*ModelMatrix.transpose();

    /*get matrix 4X4 TmpM2 */
    TmpM2 = ObjectMatrix*ModelMatrix.transpose();

    vnl_svd<double> svd(TmpM1);
    InverseMatrix = svd.inverse();

    GlobalAffineMatrix = TmpM2*InverseMatrix;    
    vcl_cerr<<GlobalAffineMatrix;
    std::cout<<std::endl;
  }


  template <class TInputImage, class TOutputImage>
  void 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::SmoothDeformationField(ImageAttributePointerType &FixedAttributeImage, DeformationFieldPointer DeformFld, DeformationFieldPointer DeformFld_Last, float LocalRatio)
  {
    SizeType size = DeformFld->GetRequestedRegion().GetSize();
    ConstDeforamtionFieldIteratorType DeformFldLast_Iter(DeformFld_Last, DeformFld_Last->GetLargestPossibleRegion());
    DeformationFieldIteratorType DeformFld_Iter(DeformFld, DeformFld->GetLargestPossibleRegion());
    DeformationVectorType Global, Deform_Update, Crnt;  

    //GlobalAffineMatrix.set_identity();

    for(DeformFldLast_Iter.GoToBegin(),DeformFld_Iter.GoToBegin();!DeformFld_Iter.IsAtEnd();++DeformFld_Iter, ++DeformFldLast_Iter)
      {
	IndexType idx = DeformFldLast_Iter.GetIndex();
	Crnt = DeformFldLast_Iter.Get();
	Deform_Update = DeformFld_Iter.Get();
	//std::cout<<"Crnt"<<Crnt<<std::endl;
	//std::cout<<"update"<<Deform_Update<<std::endl;
	for(int k=0;k<OutputImageDimension;k++)
	  Crnt[k] += idx[k];

	Global[1] = GlobalAffineMatrix[0][0]*Crnt[1] + GlobalAffineMatrix[0][1]*Crnt[0] + GlobalAffineMatrix[0][2]*Crnt[2] + GlobalAffineMatrix[0][3] - idx[1];
	Global[0] = GlobalAffineMatrix[1][0]*Crnt[1] + GlobalAffineMatrix[1][1]*Crnt[0] + GlobalAffineMatrix[1][2]*Crnt[2] + GlobalAffineMatrix[1][3] - idx[0];
	Global[2] = GlobalAffineMatrix[2][0]*Crnt[1] + GlobalAffineMatrix[2][1]*Crnt[0] + GlobalAffineMatrix[2][2]*Crnt[2] + GlobalAffineMatrix[2][3] - idx[2];
	for(int k=0;k<OutputImageDimension;k++)  
	  {
	    //Global[k] = GlobalAffineMatrix[k][0]*Crnt[0] + GlobalAffineMatrix[k][1]*Crnt[1] + GlobalAffineMatrix[k][2]*Crnt[2] + GlobalAffineMatrix[k][3] - idx[k];
	    Deform_Update[k] = Global[k] + (Deform_Update[k]-Global[k])*LocalRatio/1.2;
	    if(Deform_Update[k]+idx[k]<0)
	      Deform_Update[k] = -idx[k];
	    if(Deform_Update[k]+idx[k]>=size[k]-1)
	      Deform_Update[k] = size[k]-1-idx[k];
	  }
	//std::cout<<"update2"<<Deform_Update<<std::endl;
	DeformFld_Iter.Set(Deform_Update);
      }
#ifdef DUMP_DEFORMATIONFIELD
    //   printf("Before Smoothing\n");
    //    IndexType tmpidx;
    //    DeformationVectorType tmpdfm;
    //    tmpidx[2] = 15;
    //   int t1 = 0;
    //    for(int i=0;i<64;i++)
    //       for(int j=0;j<64;j++)
    //       {
    //         tmpidx[0] = i;
    //         tmpidx[1] = j;
    //         tmpdfm = DeformFld->GetPixel(tmpidx);
    //        printf("%d %f %f %f\n", t1, tmpdfm[0], tmpdfm[1], tmpdfm[2]);
    //         //std::cout<<t1<<tmpdfm<<std::endl;
    //         ++t1;
    //      }
#endif
    //somoothing
    if(m_Intensitymode == 0)
      {
	for(int ntime=0;ntime<m_SmoothTime;ntime++)
	  {
	    SmoothDeformation_OneTime(FixedAttributeImage, DeformFld, m_SmoothTime-ntime);
	  }
	if(m_SmoothTime>0)
	  {
	    for(int ntime=0;ntime<m_SmoothTime;ntime++)
	      {
		EdgePreserveSmoothDeformation_OneTime(FixedAttributeImage, DeformFld, m_SmoothTime-ntime);
	      }
	  }
      }
    else
      {
	SmoothDeformation_OneTime_4I(DeformFld);
	if(m_SmoothTime>0)
	  EdgePreserveSmoothDeformation_OneTime_4I(FixedAttributeImage, DeformFld);
      }
#ifdef DUMP_DEFORMATIONFIELD
    //   t1 = 0;
    //    printf("After Smoothing\n");
    //    for(int i=0;i<64;i++)
    //       for(int j=0;j<64;j++)
    //       {
    //         tmpidx[0] = i;
    //         tmpidx[1] = j;
    //         tmpdfm = DeformFld->GetPixel(tmpidx);
    //        printf("%d %f %f %f\n", t1, tmpdfm[0], tmpdfm[1], tmpdfm[2]);
    //         //std::cout<<t1<<tmpdfm<<std::endl;
    //         ++t1;
    //      }
#endif
  }


  /**
   * Set the standard deviations.
   */
  template <class TInputImage, class TOutputImage>
  void
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::SetStandardDeviations(
			  double value )
  {

    unsigned int j;
    for( j = 0; j < 3; j++ )
      {
	if( value != m_StandardDeviations[j] )
	  {
	    break;
	  }
      }
    if( j < 3 )
      {
	this->Modified();
	for( j = 0; j < InputImageDimension; j++ )
	  {
	    m_StandardDeviations[j] = value;
	  }
      }

  }

  template <class TInputImage, class TOutputImage>
  void
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::SmoothDeformation_OneTime(ImageAttributePointerType &FixedAttributeImage, DeformationFieldPointer DeformFld, int Time)
  {
    SizeType size = DeformFld->GetRequestedRegion().GetSize();
    float LocalRatio;
    DeformationFieldIteratorType DeformFld_Iter(DeformFld, DeformFld->GetLargestPossibleRegion());
    DeformationVectorType PointDeform, CurrentDeform, MeanDeform;  
    IndexType CurrentIndex, PointIndex;
    AttributeVectorType TemplateFeature, CurrentFeature;
    RegionType dummyRegion = DeformFld->GetLargestPossibleRegion();

    LocalRatio = m_DfmSmthCoeff;
    for(DeformFld_Iter.GoToBegin();!DeformFld_Iter.IsAtEnd();++DeformFld_Iter)
      {
	float Num = 0;
	PointIndex = DeformFld_Iter.GetIndex();
	PointDeform = DeformFld_Iter.Get();
	TemplateFeature = FixedAttributeImage->GetPixel(PointIndex);
	for(int s=0;s<InputImageDimension;s++)
	  MeanDeform[s] = 0;
	for(int k=1;k<27;k++)
	  {
	    for(int s=0;s<InputImageDimension;s++)
	      CurrentIndex[s] = PointIndex[s] + m_SubvolumeNeighborhood[k][s];
	    if(!dummyRegion.IsInside(CurrentIndex))
	      {
	      continue;
	      }
	    CurrentDeform = DeformFld->GetPixel(CurrentIndex);
	    for(int s=0;s<InputImageDimension;s++)
	      MeanDeform[s] += CurrentDeform[s];
	    Num += 1;
	  }//end of k
	for(int s=0;s<InputImageDimension;s++)
	  MeanDeform[s] /= Num;
	if(Time<=1 || TemplateFeature.GetEdge()!=m_GMVNEDGE && TemplateFeature.GetEdge()!=m_WMVNEDGE ) 
	  {
	    for(int s=0;s<InputImageDimension;s++)
	      PointDeform[s] += (MeanDeform[s]-PointDeform[s])*LocalRatio;
	  }
	else
	  {
	    for(int s=0;s<InputImageDimension;s++)
	      CurrentDeform[s] = (MeanDeform[s]-PointDeform[s])*LocalRatio;
	    float mag = ComputeVectorMagnitude(CurrentDeform);
	    if( mag>3.0 ) /* 2.0 before June 22 2004 */
	      {
		for(int s=0;s<InputImageDimension;s++)
		  {
		    CurrentDeform[s] /= mag/3.0;
		  }
	      }
	    for(int s=0;s<InputImageDimension;s++)
	      PointDeform[s] += CurrentDeform[s];
	  }
	for(int s=0;s<InputImageDimension;s++)
	  {
	    if(PointDeform[s]+PointIndex[s]<0)
	      PointDeform[s] = - PointIndex[s];
	    if(PointDeform[s]+PointIndex[s]>=size[s]-1)
	      {
		PointDeform[s] = size[s]-1-PointIndex[s];
	      }
	  }
	DeformFld_Iter.Set(PointDeform);
      }
  }

  template <class TInputImage, class TOutputImage>
  void
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::EdgePreserveSmoothDeformation_OneTime(ImageAttributePointerType &FixedAttributeImage, DeformationFieldPointer DeformFld, int Time)
  {
    SizeType size = DeformFld->GetRequestedRegion().GetSize();
    float LocalRatio;
    DeformationFieldIteratorType DeformFld_Iter(DeformFld, DeformFld->GetLargestPossibleRegion());
    DeformationVectorType PointDeform, CurrentDeform, MeanDeform;  
    IndexType CurrentIndex, PointIndex;
    AttributeVectorType TemplateFeature, CurrentFeature;
    RegionType dummyRegion = FixedAttributeImage->GetLargestPossibleRegion();

    LocalRatio = m_DfmSmthCoeff;
    for(DeformFld_Iter.GoToBegin();!DeformFld_Iter.IsAtEnd();++DeformFld_Iter)
      {
	float Num = 0;
	PointIndex = DeformFld_Iter.GetIndex();
	PointDeform = DeformFld_Iter.Get();
	TemplateFeature = FixedAttributeImage->GetPixel(PointIndex);
	for(int s=0;s<InputImageDimension;s++)
	  MeanDeform[s] = 0;
	for(int k=1;k<27;k++)
	  {
	    for(int s=0;s<InputImageDimension;s++)
	      CurrentIndex[s] = PointIndex[s] + m_SubvolumeNeighborhood[k][s];
	    if(!dummyRegion.IsInside(CurrentIndex))
	      {
	      continue;
	      }

	    CurrentDeform = DeformFld->GetPixel(CurrentIndex);
	    for(int s=0;s<InputImageDimension;s++)
	      MeanDeform[s] += CurrentDeform[s];
	    Num += 1;
	    if(Time>1 && TemplateFeature.GetEdge()==m_GMVNEDGE && TemplateFeature.GetEdge()==m_WMVNEDGE ) 
	      {
		for(int s=0;s<InputImageDimension;s++)
		  MeanDeform[s] += CurrentDeform[s];
		Num += 1;
	      }
	  }
	for(int s=0;s<InputImageDimension;s++)
	  MeanDeform[s] /= Num;
	if(Time<=1 || TemplateFeature.GetEdge()!=m_GMVNEDGE && TemplateFeature.GetEdge()!=m_WMVNEDGE ) 
	  {
	    for(int s=0;s<InputImageDimension;s++)
	      PointDeform[s] += (MeanDeform[s]-PointDeform[s])*LocalRatio;
	  }
	else
	  {
	    for(int s=0;s<InputImageDimension;s++)
	      PointDeform[s] += (MeanDeform[s]-PointDeform[s])*LocalRatio/100.0;
	  }
	for(int s=0;s<InputImageDimension;s++)
	  {
	    if(PointDeform[s]+PointIndex[s]<0)
	      PointDeform[s] = - PointIndex[s];
	    if(PointDeform[s]+PointIndex[s]>=size[s]-1)
	      {
		PointDeform[s] = size[s]-1-PointIndex[s];
	      }
	  }
	DeformFld_Iter.Set(PointDeform);
      }
  }

  template <class TInputImage, class TOutputImage>
  void
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::HAMMERRegistrationOneRound(ImageAttributePointerType &FixedAttributeImage, ImageAttributePointerType &MovingAttributeImage, int resolution, float ratio_iteration, float LocalRatio, DeformationFieldPointer DeformFld)
  {
    int vv;
    unsigned int s,t;
    DeformationVectorType dfm, dfm_prev;
    float MaxDegree;
    IndexType idx;
    DeforamtionVectorArrayType m_LastDeformationList;
    RegionType dummyRegion;
    int Image_Z_Size, Image_X_Size, Image_Y_Size;
    //Step1: copy the previous deformation and previous displacement on driving voxel
    DeformationFieldPointer DeformFieldLast = DeformationFieldType::New();
    // copy field to m_DeformationFieldLast
    DeformFieldLast->CopyInformation( DeformFld );
    DeformFieldLast->SetRegions( DeformFld->GetLargestPossibleRegion() );
    DeformFieldLast->Allocate();

    dummyRegion = FixedAttributeImage->GetLargestPossibleRegion();
    Image_Z_Size = dummyRegion.GetSize()[2];
    Image_X_Size = dummyRegion.GetSize()[1];
    Image_Y_Size = dummyRegion.GetSize()[0];

    DeformationFieldIteratorType DeformFldLast_Iter(DeformFieldLast, DeformFieldLast->GetRequestedRegion());
    DeformationFieldIteratorType DeformFld_Iter(DeformFld, DeformFld->GetRequestedRegion());
    for(DeformFldLast_Iter.GoToBegin(),DeformFld_Iter.GoToBegin();!DeformFld_Iter.IsAtEnd();++DeformFld_Iter, ++DeformFldLast_Iter)
      {
	DeformFldLast_Iter.Set(DeformFld_Iter.Get());
      }
    //copy the vector

    m_LastDeformationList.resize(m_ModelDrivingPointDisplacement.size());
    for(unsigned int k=0;k<m_ModelDrivingPointDisplacement.size();k++)
      {
	for(int s=0;s<InputImageDimension;s++)
	  m_LastDeformationList[k][s] = m_ModelDrivingPointDisplacement[k][s];
      }

    //Step2: Initilzation before the feature matching 
    int SHIFT = resolution/2 ; 
    if(SHIFT<1)
      SHIFT=1; 

    int incre = 2*SHIFT/6; 
    if(incre<1) 
      incre=1 ;

    m_SubvolumeRadius = (SHIFT+1)*2+1; /*nb_size should be odd number */ 

    CreatePointMatchingNeighbor(m_SearchNeighbor, SHIFT);
    CreateSubvolumnNeighbor(m_SubvolumeNeighborhood, m_SubvolumeRadius);

    int Subvolumn_Matching_Size = m_SubvolumeNeighborhood.size();
    printf("Subvolume radius is %d    subvolume size is %d\n", m_SubvolumeRadius, Subvolumn_Matching_Size);

    //Step2.1 get the forces from the landmarks 
    if(ratio_iteration<=ITER_THRD) 
      {
	if(m_Softmode == 1)
	  SoftFindingInverseForceFromSubject(FixedAttributeImage, MovingAttributeImage, DeformFld, SHIFT+1);
	else
	  FindingInverseForceFromSubject(FixedAttributeImage, MovingAttributeImage, DeformFld, SHIFT+1);
      }
    //std::cout << "Step 3 Start the correspondence matching\n";
    //Step 3 Start the correspondence matching 
    int MdlVer_Num = m_PickedPointsOnFixedImage.size();
    for(vv=0; vv<MdlVer_Num; vv+=STEPPNT) 
      {
	int Template_Driving_Point_Index = (vv + m_StartSearchPoint)%MdlVer_Num ; 

	if(m_Softmode==1)
	  MaxDegree = SoftDetermineCorrespondenceOnOneDrivingVoxel(FixedAttributeImage, MovingAttributeImage, DeformFld, Template_Driving_Point_Index, DeformFieldLast, dfm, SHIFT, incre);
	else
	  MaxDegree = DetermineCorrespondenceOnOneDrivingVoxel(FixedAttributeImage, MovingAttributeImage, DeformFld, Template_Driving_Point_Index, DeformFieldLast, dfm, SHIFT, incre);
			
	//std::cout<<Template_Driving_Point_Index<<":"<<MaxDegree<<"-->"<<dfm<<std::endl;  
	if( MaxDegree>m_SubvolumnSimilarityThreshold)
	  {          
	    int UsedVNum = Subvolumn_Matching_Size;
	    int UsedGuassianSigma = m_SubvolumeRadius ;

	    DisseminateDeformation(DeformFld, Template_Driving_Point_Index, dfm, m_SubvolumeNeighborhood, UsedVNum, UsedGuassianSigma);      
	  }//end of else

      }//end of vv loop  

    m_StartSearchPoint = vv ;

    //Smoothing
    //std::cout<<"update MdlVer"<<std::endl;
    if(m_TPSmode==0)
      {
	for(unsigned int k=0;k<m_PickedPointsOnFixedImage.size();k++)
	  {
	    idx = m_PickedPointsOnFixedImage[k];
	    dfm = DeformFld->GetPixel(idx);
	    for(int j=0;j<InputImageDimension;j++)
	      m_ModelDrivingPointDisplacement[k][j] = dfm[j]+idx[j];
	    //std::cout<<k<<":"<<dfm<<std::endl;
	  }
	FitGlobalAffineTransform(m_ModelDrivingPointDisplacement, m_LastDeformationList);
	SmoothDeformationField(FixedAttributeImage, DeformFld, DeformFieldLast, LocalRatio);
      }
    else
      {
	//TPS mode
	DeformationFieldPointer Deform_TPS = DeformationFieldType::New();
	InputImagePointer TPS_MaskImg = InputImageType::New();
	InputImagePointer TPS_OutputImage = InputImageType::New();
	DeformationFieldPointer Deform_Increment = DeformationFieldType::New();
	TPS_MaskImg->CopyInformation(FixedAttributeImage);
	TPS_MaskImg->SetRegions(FixedAttributeImage->GetLargestPossibleRegion());
	TPS_MaskImg->Allocate();
	TPS_OutputImage->CopyInformation(FixedAttributeImage);
	TPS_OutputImage->SetRegions(FixedAttributeImage->GetLargestPossibleRegion());
	TPS_OutputImage->Allocate();
	DeformFieldLast->CopyInformation( DeformFld );
	DeformFieldLast->SetRegions( DeformFld->GetLargestPossibleRegion() );
	DeformFieldLast->Allocate();
	Deform_Increment->CopyInformation( DeformFld );
	Deform_Increment->SetRegions( DeformFld->GetLargestPossibleRegion() );
	Deform_Increment->Allocate();

	for(t=0;t<m_PickedPointsOnFixedImage.size();t++)
	  {
	    for(s=0;s<InputImageDimension;s++)
	      idx[s] = m_PickedPointsOnFixedImage[t][s];
	    TPS_MaskImg->SetPixel(idx, 1);
	  }
	for(t=0;t<m_PickedPointsOnFixedImage.size();t++)
	  {
	    idx = m_PickedPointsOnFixedImage[t];
	    dfm = DeformFld->GetPixel(idx);
	    for(int j=0;j<InputImageDimension;j++)
	      m_ModelDrivingPointDisplacement[t][j] = dfm[j];
	    //std::cout<<k<<":"<<dfm<<std::endl;
	  }
	for(t=0;t<static_cast<unsigned int> (ControlPointNum);t++)
	  {
	    for(s=0;s<InputImageDimension;s++)
	      idx[s] = ControlPointList[t][s];
	    dfm_prev = DeformFieldLast->GetPixel(idx);
	    dfm = DeformFld->GetPixel(idx);
	    for(s=0;s<InputImageDimension;s++)
	      ControlPointDeform[t][s] = dfm[s] - dfm_prev[s];
	  }
	GenerateDeformationbyTPS_Bookstein(FixedAttributeImage, DeformFieldLast, Deform_Increment, ControlPointDeform, ControlPointList, ControlPointNum,\
					   Image_X_Size, Image_Y_Size, Image_Z_Size, TPS_MaskImg, TPS_OutputImage);
	InputImageIteratorType TPSMaskIter = InputImageIteratorType(TPS_MaskImg, TPS_MaskImg->GetLargestPossibleRegion());
	InputImageIteratorType TPSOutputIter = InputImageIteratorType(TPS_OutputImage, TPS_OutputImage->GetLargestPossibleRegion());
	DeformationFieldIteratorType OutputDeformFldIter = DeformationFieldIteratorType(DeformFld, DeformFld->GetLargestPossibleRegion());
	DeformationFieldIteratorType IncreDeformFldIter = DeformationFieldIteratorType(Deform_Increment, Deform_Increment->GetLargestPossibleRegion());
	DeformationFieldIteratorType LastDeformFldIter = DeformationFieldIteratorType(DeformFieldLast, DeformFieldLast->GetLargestPossibleRegion());
	for(TPSMaskIter.GoToBegin(),TPSOutputIter.GoToBegin(),OutputDeformFldIter.GoToBegin(),IncreDeformFldIter.GoToBegin(),LastDeformFldIter.GoToBegin();!OutputDeformFldIter.IsAtEnd();\
	    ++TPSMaskIter,++TPSOutputIter,++OutputDeformFldIter,++IncreDeformFldIter,++LastDeformFldIter)
	  {
	    if(TPSMaskIter.Get()==0 && TPSOutputIter.Get()==1)
	      {
		OutputDeformFldIter.Set(IncreDeformFldIter.Get()+LastDeformFldIter.Get());
	      }
	  }
      }
  }

  template <class TInputImage, class TOutputImage>
  void 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::HAMMERMainLoop(ImageAttributePointerType &FixedAttributeImage, 
		   ImageAttributePointerType &MovingAttributeImage, 
		   DeformationFieldPointer &DeformFld,
		   itk::ProgressReporter & progress)
  {
    float TempVNvlm_DN;
    int hierarchicalSmth;
    m_IterationRatio = 0;

    for(unsigned int iter=0;iter<m_NumberOfIterations;iter++)
      {
	clock_t Start = clock();
			
	if(m_IterationRatio>0.5)
	  {
	    if(m_Intensitymode == 0)
	      {
		m_FixedImageDrivingVoxelQualification[0] = (0.9*(1-m_IterationRatio)*255);
		m_FixedImageDrivingVoxelQualification[1] = (0.4*(2.0*m_IterationRatio)*255);
		m_FixedImageDrivingVoxelQualification[2] = (m_FixedImageDrivingVoxelQualification[1]/1.1);
		m_FixedImageDrivingVoxelQualification[3] = (255/4);
		if(m_Softmode ==1)
		  TempVNvlm_DN = 250+(255-250)*m_IterationRatio*1.1 ; /* 127: ~65% added on Dec 2006 */
		else
		  TempVNvlm_DN = 170+(255-170)*m_IterationRatio*1.5 ;      

		if(TempVNvlm_DN>255)  
		  m_FixedImageDrivingVoxelQualification[4] = 255 ;
		else
		  m_FixedImageDrivingVoxelQualification[4] = TempVNvlm_DN ;
		m_FixedImageDrivingVoxelQualification[5] = (255/4);
	      }
	    else
	      {
		float CurrentRequirePercent = m_StartPercent+(1-m_StartPercent)*pow((double)m_IterationRatio, 2.0) ;  /* 2.0 */
		if( CurrentRequirePercent>m_MaxPercentOfAllowedDrivingVoxels ) 
		  CurrentRequirePercent = m_MaxPercentOfAllowedDrivingVoxels ; 
		m_FixedImageDrivingVoxelQualification[2] = m_MdlCannyMinRecord*(1.-pow((double)m_IterationRatio, 16.0)) ;
		SearchThresholdForObtainingTopXPercentOfDrivingVoxels(FixedAttributeImage, m_FixedImageDrivingVoxelQualification, CurrentRequirePercent);
		printf("CurrentRequirePercent=%f\n", CurrentRequirePercent) ;
		printf("MdlThreshold.Variance_UP=%f, MdlThreshold.CannyEdge_UP=%f\n", m_FixedImageDrivingVoxelQualification[0], m_FixedImageDrivingVoxelQualification[1]) ;
	      }

	    IdentifyDrivingVoxelsInFixedImage(FixedAttributeImage, m_FixedImageDrivingVoxelQualification);
	    // std::cout << m_PickedPointsOnFixedImage.size() << std::endl;
	    m_InverseDisplacement.clear();
	    m_ModelDrivingPointDisplacement.clear();
	    m_ModelAttributeVector.clear();
	    m_InverseDisplacement.resize(m_PickedPointsOnFixedImage.size());
	    m_ModelDrivingPointDisplacement.resize(m_PickedPointsOnFixedImage.size());
	    for(unsigned int k=0;k<m_PickedPointsOnFixedImage.size();k++)
	      {
		IndexType index = m_PickedPointsOnFixedImage[k];
		DeformationVectorType Deform = DeformFld->GetPixel(index);
		for(int s=0;s<InputImageDimension;s++)
		  {
		    m_ModelDrivingPointDisplacement[k][s] = index[s] + Deform[s];
		    m_InverseDisplacement[k][s] = 0;
		  }
		AttributeVectorType Feature = FixedAttributeImage->GetPixel(index);
		m_ModelAttributeVector.push_back(Feature);
	      }
	  }// m_IterationRatio is smaller than  0.5
	m_IterationRatio = (float)iter/(float)(m_NumberOfIterations) ;

	int resolution = static_cast<int>( m_SearchRadius*exp(-m_IterationRatio*m_IterationRatio/2./0.36) );
	if(resolution<1) 
	  resolution=1 ;

			
	float LocalRatio = 0.25 + 0.65*exp(-(m_IterationRatio-1.)*(m_IterationRatio-1.)/2./.25/.25) ;
	if(m_Intensitymode == 0)
	  {
	    m_PointMatchingThreshold = m_PointMatching_Initial*(1-m_IterationRatio)+0.001 ; /* 0.8 */ 
	    m_DIF_THR = 54;
	  }
	else
	  {
	    m_PointMatchingThreshold = m_PointMatching_Initial*(1-m_IterationRatio) + 0.2 ; /* 0.8 */ 
	    m_DIF_THR = (1.-m_PointMatchingThreshold)*255.0 ;
	  }
	//HAMMER_ONE_LOOP + SMOOTHING

	//     AttributeVectorType f;
	//     IndexType index;
	//     for(int k=0;k<m_PickedPointsOnFixedImage.size();k++)
	//     {
	//       if(k==1603)
	//         printf("");
	//       index = m_PickedPointsOnFixedImage[k];
	//       f = FixedAttributeImage->GetPixel(index);      
	//     }
	if(m_Intensitymode == 1 && m_IterationRatio<0.5)
	  {
	    m_SubvolumeRadius = 3;
	    CreateSubvolumnNeighbor(m_SubvolumeNeighborhood, m_SubvolumeRadius);
	    SmoothDeformation_OneTime_4I(DeformFld);
	  }
	HAMMERRegistrationOneRound(FixedAttributeImage, MovingAttributeImage, resolution, m_IterationRatio, LocalRatio, DeformFld);

	//do the extra smoothing step in soft mode
	if(m_Softmode == 1)
	  {
	    int XYZres = 1<<(2-m_CurrentResolution);
	    hierarchicalSmth = (int) ((1 - m_IterationRatio/2)*XYZres*m_AdditionalSmth + 0.5) ; 				
	    if( hierarchicalSmth>0 )
	      {
		for(int num=0; num<hierarchicalSmth; num++)
		  SmoothDeformation_OneTime(FixedAttributeImage, DeformFld, m_SmoothTime);
	      }
	  }

	// printf("print MdlVer\n");
	for(unsigned int k=0;k<m_PickedPointsOnFixedImage.size();k++)
	  {
	    IndexType index = m_PickedPointsOnFixedImage[k];
	    DeformationVectorType Deform = DeformFld->GetPixel(index);
	    for(int s=0;s<InputImageDimension;s++)
	      {
		m_ModelDrivingPointDisplacement[k][s] = index[s] + Deform[s];                
	      }
	    //printf("%d %f %f %f\n", k, m_ModelDrivingPointDisplacement[k][0], m_ModelDrivingPointDisplacement[k][1], m_ModelDrivingPointDisplacement[k][2]);
	  }
	clock_t End = clock();
	double sec = static_cast<double>(End-Start)/static_cast<double>(CLOCKS_PER_SEC);

	if (m_CurrentResolution == 0)
	  {
	    progress.CompletedPixel() ;
	    std::cout<<"Low Res: ITER "<< iter << " time: " << sec << " seconds\n";
	  }
	else if (m_CurrentResolution == 1)
	  {
	    for (unsigned int k = 0; k < 8; k++)
	      {
		progress.CompletedPixel() ;
	      }
	    std::cout<<"Mid Res: ITER "<< iter << " time: " << sec << " seconds\n";
	  }
	else if (m_CurrentResolution == 2)
	  {
	    for (unsigned int k = 0; k < 32; k++)
	      {
		progress.CompletedPixel() ;
	      }
	    std::cout<<"High Res: ITER "<< iter << " time: " << sec << " seconds\n";
	  }
      }
  }



  template <class TInputImage, class TOutputImage>
  void 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::IdentifyDrivingVoxelsInFixedImage(ImageAttributePointerType &FixedAttributeImage, std::vector<float> &FixedImageCriteria)
  {
    if(m_Intensitymode==0)
      IdentifyDrivingVoxels_GR( FixedAttributeImage, m_PickedPointsOnFixedImage, FixedImageCriteria );
    else
      IdentifyDrivingVoxels_GR_4I( FixedAttributeImage, m_PickedPointsOnFixedImage, FixedImageCriteria );
  }

  template <class TFixedImage, class TMovingImage>
  void 
  HammerDeformableRegistrationImageFilter<TFixedImage, TMovingImage>
  ::IdentifyDrivingVoxelsInMovingImage(ImageAttributePointerType &MovingAttributeImage, std::vector<float> &MovingImageCriteria )
  {
    if(m_Intensitymode == 0)
      IdentifyDrivingVoxels_GR( MovingAttributeImage, m_PickedPointsOnMovingImage, MovingImageCriteria );
    else
      IdentifyDrivingVoxels_GR_4I( MovingAttributeImage, m_PickedPointsOnMovingImage, MovingImageCriteria );
  }

  template <class TFixedImage, class TMovingImage>
  unsigned int 
  HammerDeformableRegistrationImageFilter<TFixedImage, TMovingImage>
  ::IdentifyDrivingVoxels( ImageAttributePointerType avPointer, std::vector<IndexType> & drivingVoxels, std::vector<float> &Criteria)
  {
    // Generate a binary image that indicates voxels already been selected
    typename InputImageType::Pointer mask = InputImageType::New();
    mask->CopyInformation( this->m_FixedAttributeImage );
    mask->SetRegions( mask->GetLargestPossibleRegion() );
    mask->Allocate();
    mask->FillBuffer( 0 );

    unsigned int nVoxels = drivingVoxels.size();
    for (unsigned int k = 0; k < nVoxels; k++)
      {
	typename InputImageType::IndexType idx = drivingVoxels[k];
	mask->SetPixel( idx, 1 );
      }

    itk::ImageRegionIteratorWithIndex<ImageAttributeType> 
      itAV( avPointer, avPointer->GetLargestPossibleRegion() );

    itk::ImageRegionIteratorWithIndex<InputImageType> 
      itMask( mask, mask->GetLargestPossibleRegion() );

    for ( itAV.GoToBegin(), itMask.GoToBegin(); !itAV.IsAtEnd(); ++itAV, ++itMask )
      {
	if ( itMask.Get() == 1 )   // voxel already a driving voxel
	  {
	    continue;
	  }
	AttributeVectorType a = itAV.Get();
	if ( a.IsQualifiedDrivingVoxel( Criteria ) )
	  {
	    drivingVoxels.push_back( itAV.GetIndex() );
	    nVoxels ++;
	  }
      }

    return nVoxels;
  }

  template <class TFixedImage, class TMovingImage>
  unsigned int 
  HammerDeformableRegistrationImageFilter<TFixedImage, TMovingImage>
  ::IdentifyDrivingVoxels_GR( ImageAttributePointerType avPointer, std::vector<IndexType> & drivingVoxels, std::vector<float> &Criteria)
  {
    //std::cout << "attribute:\n" << avPointer->GetLargestPossibleRegion() << std::endl;
    // Generate a binary image that indicates voxels already been selected
    int nVoxels = 0;
    int nEdge = 0;
    itk::ImageRegionIteratorWithIndex<ImageAttributeType> 
      itAV( avPointer, avPointer->GetLargestPossibleRegion() );

    typename InputImageType::Pointer mask = InputImageType::New();
    mask->CopyInformation( avPointer );
    mask->SetRegions( mask->GetLargestPossibleRegion() );
    mask->Allocate();
    mask->FillBuffer( 0 );

    //std::cout << "attribute:\n" << avPointer->GetLargestPossibleRegion() << std::endl;
    //std::cout << "mask:\n" << mask->GetLargestPossibleRegion() << std::endl;

    drivingVoxels.clear();
    for ( itAV.GoToBegin(); !itAV.IsAtEnd(); ++itAV)
      {
	AttributeVectorType a = itAV.Get();
	if ( a.IsQualifiedDrivingVoxel_GR( Criteria ) )
	  {
	    drivingVoxels.push_back( itAV.GetIndex() );
	    nVoxels ++;
	  }
	if(a.GetEdge()>0)
	  nEdge++;
      }

    for(unsigned int k=0;k<drivingVoxels.size();k++)
      {
	IndexType idx = drivingVoxels[k];
	mask->SetPixel(idx, 250);
      }
    typedef itk::ImageFileWriter< InputImageType > WriterType;
    typename WriterType::Pointer writer = WriterType::New();
    writer->SetInput(mask);
    writer->SetFileName("DVImg.hdr");
    writer->Update();

    // std::cout<<"DV Number is "<<nVoxels<<" Edge Number is "<<nEdge<<std::endl;
    return nVoxels;
  }


  template <class TFixedImage, class TMovingImage>
  void 
  HammerDeformableRegistrationImageFilter<TFixedImage, TMovingImage>
  ::SetIterations(unsigned int i0, unsigned int i1, unsigned int i2 )
  {
    this->m_Iterations[0] = i0;
    this->m_Iterations[1] = i1;
    this->m_Iterations[2] = i2;
    return;
  }

  template <class TFixedImage, class TMovingImage>
  void 
  HammerDeformableRegistrationImageFilter<TFixedImage, TMovingImage>
  ::LinearInitialization( )
  {
    // do serious registration
    typedef itk::MattesMutualInformationImageToImageMetric<InputImageType, InputImageType> MetricType;
    typedef itk::RegularStepGradientDescentOptimizer OptimizerType;
    typedef itk::NearestNeighborInterpolateImageFunction<InputImageType, double> InterpolatorType;
    typedef itk::ImageRegistrationMethod<InputImageType, InputImageType> RegistrationType;

    typename MetricType::Pointer metric                 = MetricType::New();
    typename OptimizerType::Pointer optimizer           = OptimizerType::New();
    typename InterpolatorType::Pointer interpolator     = InterpolatorType::New();
    typename RegistrationType::Pointer registration     = RegistrationType::New();

    //  metric->SetUsePhysicalSpaceSampling( true );
    registration->SetMetric(        metric        );
    registration->SetOptimizer(     optimizer     );
    registration->SetInterpolator(  interpolator  );

    registration->SetTransform( m_AffineTransform );
    registration->SetFixedImage(  m_FixedImage  );
    registration->SetMovingImage( m_MovingImage  );


    m_FixedImage->Print( std::cout );
    m_MovingImage->Print( std::cout );


    metric->ReinitializeSeed( 31415926 );
    metric->SetNumberOfSpatialSamples( 10000 );
    metric->SetNumberOfHistogramBins( 50 );

    optimizer->SetNumberOfIterations ( 200 );
    optimizer->SetMinimumStepLength ( 0.0005 );
    optimizer->SetMaximumStepLength ( 0.2 );
    optimizer->SetMinimize ( true );

    typename OptimizerType::ScalesType scales;
    scales.SetSize( m_AffineTransform->GetNumberOfParameters() );
    scales.Fill( 1 );
    for (::size_t k = 9; k < scales.GetSize(); k++)
      {
	scales[k] = 100;
      }  

    optimizer->SetScales( scales );

    typename InputImageType::RegionType fixedRegion = m_FixedImage->GetLargestPossibleRegion();

    std::cout << "Region used for registration: \n" << fixedRegion << std::endl;
    registration->SetFixedImageRegion( fixedRegion );

    std::cout << m_AffineTransform->GetParameters() << std::endl;

    registration->SetInitialTransformParameters( m_AffineTransform->GetParameters() );

    typename CommandIterationUpdate::Pointer observer = CommandIterationUpdate::New();
    optimizer->AddObserver( itk::IterationEvent(), observer );
    observer->SetRegistration( registration );

    try 
      { 
	registration->StartRegistration(); 
      } 
    catch( itk::ExceptionObject & err ) 
      { 
	std::cerr << "ExceptionObject caught !" << std::endl; 
	std::cerr << err << std::endl; 
	return ;
      } 
    m_AffineTransform->SetParameters( registration->GetLastTransformParameters() );
    return;
  }

  /**
   * the core function which determine the correspondence on model driving voxel array
   */
  template <class TInputImage, class TOutputImage>
  float 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::SoftDetermineCorrespondenceOnOneDrivingVoxel(ImageAttributePointerType &FixedAttributeImage, ImageAttributePointerType &MovingAttributeImage, DeformationFieldPointer &DeformFld, int DrivingPointIndex, DeformationFieldPointer DeformFld_Last, DeformationVectorType &DeformationUpdate, int SearchRadius, int Step) const
  {
    int i, j, k;
    float PointSimilarity, NeighborhoodSimilarity, MaxSimilarityDegree;
    float DisplacementMagnitude;
    RegionType dummyRegion = FixedAttributeImage->GetLargestPossibleRegion();
    AttributeVectorType TemplateFeature, SubjectFeature;
    DeformationVectorType DeformationOnParticularPoint, TentativeWarp;
    DeformationUpdate.Fill( 0 );
    float MinDistance, MinDistThresh;
    DeformationVectorType MeanDeform, MaxDeform; 
    float OverallSimilarity;
    float weight, level;

    int SmplStep =  m_SubvolumeNeighborhood.size()/20;
    IndexType ImageIndex;                             
    //if(!m_FixedImage->TransformPhysicalPointToIndex(m_PickedPointsOnFixedImage[DrivingPointIndex], ImageIndex))
    //  printf("Model driving point is out of the image boundary.\n");
    ImageIndex = m_PickedPointsOnFixedImage[DrivingPointIndex];

    //Step 1: check whether the the input point has the inverse force
    float Displacement_Magnitude = ComputeVectorMagnitude(m_InverseDisplacement[DrivingPointIndex]);
    if(Displacement_Magnitude>0 && m_IterationRatio<=ITER_THRD)
      {    
	DeformationVectorType CurrentDeformation, PreviousDeformation;
	CurrentDeformation = DeformFld->GetPixel(ImageIndex);
	PreviousDeformation = DeformFld_Last->GetPixel(ImageIndex);
	//std::cout<<CurrentDeformation<<"     "<<PreviousDeformation<<std::endl;
	DeformationUpdate = PreviousDeformation - CurrentDeformation + m_InverseDisplacement[DrivingPointIndex]*(1.0+(1.0 + m_IterationRatio))/2.0;

	//std::cout<<m_InverseDisplacement[DrivingPointIndex]<<"         "<<DeformationUpdate;

	DisplacementMagnitude = ComputeVectorMagnitude(DeformationUpdate);
	if(DisplacementMagnitude > (m_SubvolumeRadius>>1))
	  {
	    DeformationUpdate = DeformationUpdate/DisplacementMagnitude*(m_SubvolumeRadius>>1);
	  }
	return 10000.0 ;
      }

    //Step 2: find the correspondence of particular point
    MaxSimilarityDegree = 0;
    TentativeWarp.Fill(0);
    TemplateFeature = FixedAttributeImage->GetPixel(ImageIndex);
    DeformationOnParticularPoint = DeformFld->GetPixel(ImageIndex);
    MinDistThresh = 10000.0;
    OverallSimilarity = 0;
    for(int s=0;s<InputImageDimension;s++)
      {
	MeanDeform[s] = 0.0;
	MaxDeform[s] = 0.0;
      }
		
    for(i=-SearchRadius;i<=SearchRadius;i+=Step)
      for(j=-SearchRadius;j<=SearchRadius;j+=Step)
	for(k=-SearchRadius;k<=SearchRadius;k+=Step)
	  {
	    IndexType SubjIdx;
	    SubjIdx[0] = int(DeformationOnParticularPoint[0] + ImageIndex[0] + j + 0.5);
	    SubjIdx[1] = int(DeformationOnParticularPoint[1] + ImageIndex[1] + i + 0.5);
	    SubjIdx[2] = int(DeformationOnParticularPoint[2] + ImageIndex[2] + k + 0.5);

	    if(!dummyRegion.IsInside(SubjIdx))
	      {
		continue;
	      }
	    SubjectFeature = MovingAttributeImage->GetPixel(SubjIdx);
	    PointSimilarity = SimilarityBetweenTwoImgAttribute(TemplateFeature, SubjectFeature, false);
	    //Step 2.2: compare the similarity between two neighborhood
	    if(PointSimilarity>m_PointMatchingThreshold || (m_IsBigVN== true && TemplateFeature.GetVentricleVolume()>0 && SubjectFeature.GetVentricleVolume()>0))
	      {      
		TentativeWarp[0] = j;
		TentativeWarp[1] = i;
		TentativeWarp[2] = k;
		NeighborhoodSimilarity = SubVolumnMatching(FixedAttributeImage, MovingAttributeImage, DeformFld, ImageIndex, TentativeWarp, m_SubvolumeNeighborhood, SmplStep, &MinDistance, MinDistThresh);
		if(NeighborhoodSimilarity>MaxSimilarityDegree && NeighborhoodSimilarity>m_SubvolumnSimilarityThreshold)
		  {      
		    for(int s=0;s<InputImageDimension;s++)
		      MaxDeform[s] = TentativeWarp[s];
		    MaxSimilarityDegree = NeighborhoodSimilarity;
		    //MinDistThresh = MinDistance;
		  }
		if(NeighborhoodSimilarity > m_SubvolumnSimilarityThreshold)
		  {
		    level = (fabs(TentativeWarp[0])+fabs(TentativeWarp[0])+fabs(TentativeWarp[0]))/3.0 ;
		    weight = exp(-level*level/(2.*SearchRadius*SearchRadius)) ;
		    /*Dec 2006 MinDist = DistSeg ;*/
		    NeighborhoodSimilarity *= weight ;
		    OverallSimilarity += NeighborhoodSimilarity ;	
		    for(int s=0;s<InputImageDimension;s++)
		      MeanDeform[s] += TentativeWarp[s]*NeighborhoodSimilarity;
		  }
	      }
	  }//end of for
    if(OverallSimilarity > 0)
      {
	for(int s=0;s<InputImageDimension;s++)
	  DeformationUpdate[s] = MeanDeform[s]/OverallSimilarity;
      }
    if( m_CurrentResolution>=1 && m_IterationRatio>=ITER_THRD && MaxSimilarityDegree>m_SubvolumnSimilarityThreshold )
      {
	for(int s=0;s<InputImageDimension;s++)
	  DeformationUpdate[s] = MaxDeform[s];
	MaxSimilarityDegree = m_SubvolumnSimilarityThreshold + 0.001 ;
      }


    return ( MaxSimilarityDegree );
  }

  template <class TInputImage, class TOutputImage>
  void 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::SoftFindingInverseForceFromSubject(ImageAttributePointerType &FixedAttributeImage, ImageAttributePointerType &MovingAttributeImage, DeformationFieldPointer &DeformFld, int SearchRadius) const
  {
    typedef itk::Image<int, InputImageDimension> ModelDrivingPointImageType;
    typename ModelDrivingPointImageType::Pointer ModelDrivingPointImage = ModelDrivingPointImageType::New();
    float MinDistThresh;
    RegionType dummyRegion;
    unsigned int PointID;

    int s, SmplStep;
    IndexType SubjIndex;
    AttributeVectorType TemplateFeature, SubjectFeature;
    DeformationVectorType TentativeWarp, MaxDeform;

    float level, weight;
    int TotalMatchingPoints;

    unsigned int MdlPickedPointNum;
    std::vector<float> Multiple;
    int incre;

    incre = 2*SearchRadius/6 ; 
    if(incre<1)
      incre=1 ;  

    dummyRegion = FixedAttributeImage->GetLargestPossibleRegion();
    ModelDrivingPointImage->SetRegions(dummyRegion);
    ModelDrivingPointImage->Allocate();

    typedef itk::ImageRegionIterator<ModelDrivingPointImageType> IteratorType;
    IteratorType Itor(ModelDrivingPointImage, dummyRegion);
    for(Itor.GoToBegin();!Itor.IsAtEnd();++Itor)
      {
	Itor.Set(-1);
      }

    SmplStep = m_SubvolumeNeighborhood.size()/20;

    MdlPickedPointNum = m_PickedPointsOnFixedImage.size();
    Multiple.resize(MdlPickedPointNum);
    for( PointID=0; PointID < MdlPickedPointNum; PointID++)
      {
	//MdlIndex = m_PickedPointsOnFixedImage[PointID];
	//TentativeWarp = this->m_OutputDeformFld->GetPixel(MdlIndex);
	//for(s=0;s<InputImageDimension;s++)
	//  SubjIndex[s] = (int)(MdlIndex[s] + TentativeWarp[s]);
	for(s=0;s<InputImageDimension;s++)
	  {
	    SubjIndex[s] = (int)(m_ModelDrivingPointDisplacement[PointID][s]);
	    m_InverseDisplacement[PointID][s] = 0;
	  }
	if(dummyRegion.IsInside(SubjIndex)==false)
	  {
	  continue;
	  }
	ModelDrivingPointImage->SetPixel(SubjIndex, PointID);
	Multiple[PointID]= 0;
      }

    unsigned int TotalSamples = m_PickedPointsOnFixedImage.size()/2;
    if(TotalSamples>m_PickedPointsOnMovingImage.size()) 
      TotalSamples = m_PickedPointsOnMovingImage.size() ;
    if(TotalSamples>20000)  
      TotalSamples = 20000 ;
    float StepDesign = (float)(m_PickedPointsOnMovingImage.size())/(float)TotalSamples ; 
    MinDistThresh = 10000.0;


    PointID = 0;
    TotalMatchingPoints = 0;

    for(float l=0; PointID < m_PickedPointsOnMovingImage.size(); l += StepDesign, PointID=(int)l)
      {    
	float MaxSimilarityDegree = 0;
	MaxDeform.Fill(0);
	SubjectFeature = MovingAttributeImage->GetPixel(m_PickedPointsOnMovingImage[PointID]);

	for(int i=-SearchRadius;i<=SearchRadius;i+=incre)
	  {
	    for(int j=-SearchRadius;j<=SearchRadius;j+=incre)
	      {
		for(int k=-SearchRadius;k<=SearchRadius;k+=incre)
		  {
		    SubjIndex[0] = static_cast<int>(m_PickedPointsOnMovingImage[PointID][0]) + j;
		    SubjIndex[1] = static_cast<int>(m_PickedPointsOnMovingImage[PointID][1]) + i;
		    SubjIndex[2] = static_cast<int>(m_PickedPointsOnMovingImage[PointID][2]) + k;

		    if ( !dummyRegion.IsInside(SubjIndex) )
		      {
		      continue;
		      }

		    int MdlPickedPointID = ModelDrivingPointImage->GetPixel(SubjIndex);
		    if( MdlPickedPointID >= 0)
		      {
			//TemplateFeature = m_FixedAttributeImage->GetPixel(MdlIndex);
			TemplateFeature = m_ModelAttributeVector[MdlPickedPointID];
			float PointSimilarity = SimilarityBetweenTwoImgAttribute(TemplateFeature, SubjectFeature, false);
			if(PointSimilarity>m_PointMatchingThreshold)
			  {
			    TentativeWarp[0] = -j;
			    TentativeWarp[1] = -i;
			    TentativeWarp[2] = -k;
			    typename InputImageType::IndexType MdlIndex = m_PickedPointsOnFixedImage[MdlPickedPointID];
			    float MinDistance = 0;
			    float SubvolumnSimilarity = SubVolumnMatching(FixedAttributeImage, MovingAttributeImage, DeformFld, MdlIndex, TentativeWarp, m_SubvolumeNeighborhood, SmplStep, &MinDistance, MinDistThresh);

			    if(SubvolumnSimilarity>MaxSimilarityDegree)
			      {
				TotalMatchingPoints ++;
				level = (fabs(TentativeWarp[0])+fabs(TentativeWarp[1])+fabs(TentativeWarp[2]))/3.0 ;
				weight = exp(-level*level/(2.*SearchRadius*SearchRadius)) ;
				SubvolumnSimilarity *= weight ;
				for(int s=0;s<InputImageDimension;s++)
				  {
				    m_InverseDisplacement[MdlPickedPointID][s] += MaxDeform[s];
				    Multiple[MdlPickedPointID]+=SubvolumnSimilarity;																	
				  }
			      }
			  }
		      }//end of if( MdlPickedPointID >= 0)
		  }//end of for j
	      }//end of for i
	  }//end of for k
      }
    std::cout<<TotalMatchingPoints<<" Matched."<<std::endl;
    //   printf("Inverse force list\n");
    //   for(PointID=0;PointID<MdlPickedPointNum;PointID++)
    //   {
    //     printf("%d (%f %f %f) %f\n", PointID, m_InverseDisplacement[PointID][0], m_InverseDisplacement[PointID][1], m_InverseDisplacement[PointID][2], Multiple[PointID]);
    //   }
    int PickupNum = 0;
    for(PointID=0;PointID<MdlPickedPointNum;PointID++)
      {
	if(Multiple[PointID]>0)
	  {
	    //std::cout<<m_InverseDisplacement[PointID]<<"   "<<Multiple[PointID]<<std::endl;
	    for(s=0;s<InputImageDimension;s++)
	      {
		m_InverseDisplacement[PointID][s] /= Multiple[PointID];
	      }    
	    PickupNum++;
	  }    
      }
    std::cout<<"Inverse force ratio = "<<PickupNum<<"/"<<m_InverseDisplacement.size()<<std::endl;
  }

  template <class TInputImage, class TOutputImage>
  void 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::UpdateTPSParameter_Bookstein_V2(vnl_matrix<double> &MovingCord) 
  {
    int Num_Control_Point;
    vnl_matrix<double> T, S;

    Num_Control_Point = MovingCord.rows();
    T.set_size(Num_Control_Point+4, 4);
    T.fill(0);
    T.update(MovingCord, 0, 0);

    S = TPS_Psi * T;

    TPS_Param = S.extract(Num_Control_Point, 4, 0, 0);
    TPS_Affine = S.extract(4, 4, Num_Control_Point, 0);
    //vcl_cerr << TPS_Affine;
    //vcl_cerr << TPS_Param;
  }

  template <class TInputImage, class TOutputImage>
  void 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::PrepareTPSKernel_Compute_L_InverseBookStein(vnl_matrix<double> &FixedPoint, int PointNum, double Lemda, int HalfBlockSize) 
  {
    int i, j, x, y, z;
    vnl_vector<double> v_ij;	
    vnl_matrix<double> S, I, K;

    TPS_Psi.set_size(PointNum+4, PointNum+4);
    I.set_size(PointNum, PointNum);
    I.set_identity();
    TPS_Psi.fill(0);
    for (int i=0;i<PointNum;i++)
      {
	for (int j=i;j<PointNum;j++)
	  {			
	    // 			v_ij = FixedPoint.get_row(i) - FixedPoint.get_row(j);
	    // 			double r = v_ij.two_norm();
	    // 			TPS_Psi(i,j) = -r;

	    x = FixedPoint(i, 1) - FixedPoint(j, 1) + HalfBlockSize;
	    y = FixedPoint(i, 2) - FixedPoint(j, 2) + HalfBlockSize;
	    z = FixedPoint(i, 3) - FixedPoint(j, 3) + HalfBlockSize;
	    TPS_Psi(i,j) = LookingUpTable[z][x][y];
	  }
      }
    for (int i=0;i<PointNum;i++)
      for (int j=0;j<i;j++)
	TPS_Psi(i,j) = TPS_Psi(j,i);

    for(i=0;i<PointNum;i++)
      {
	for(j=0;j<4;j++)
	  {
	    TPS_Psi(PointNum+j, i) = FixedPoint(i, j);
	    TPS_Psi(i, PointNum+j) = FixedPoint(i, j);
	  }	
      }
    K = TPS_Psi.extract(PointNum, PointNum, 0, 0);
    K = K + I;
    TPS_Psi.update(K, 0, 0);
    vnl_svd<double> svd(TPS_Psi);
    TPS_Psi = svd.inverse();
  }

  template <class TInputImage, class TOutputImage>
  float 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::GetCurrentFittingError(DeformationFieldPointer DeformFld,  DeforamtionVectorArrayType DeformArray, IndexArrayType IndexArray, int Num) const
  {
    float FittingError, Dist;
    FittingError = 0;
    IndexType idx;
    DeformationVectorType deform;
    for(int t=0;t<Num;t++)
      {
	for(int s=0;s<InputImageDimension;s++)
	  idx[s] = IndexArray[t][s];
	deform = DeformFld->GetPixel(idx);
	Dist = 0;
	for(int s=0;s<InputImageDimension;s++)
	  Dist += HammerSQR(deform[s]-DeformArray[t][s]);
	FittingError += sqrt(Dist);
      }
    return (FittingError/Num);
  }

  template <class TInputImage, class TOutputImage>
  int 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::EstimateResidualError(ImageAttributePointerType Img, DeforamtionVectorArrayType DeformArray, IntImagePointer IndexImg, float *Return_Value, IndexType StartPoint, IndexType BlockSize) const
  {                      
    int BK_Num;
    int i,j,k;
    int index;
    float ResidualError, Dist, Num;
    RegionType dummyRegion;
    int Image_Z_Size, Image_X_Size, Image_Y_Size;
    IndexType idx;

    ResidualError = 0;
    Num = 0;
    BK_Num = 0;
    dummyRegion = Img->GetLargestPossibleRegion();
    Image_Z_Size = dummyRegion.GetSize()[2];
    Image_X_Size = dummyRegion.GetSize()[1];
    Image_Y_Size = dummyRegion.GetSize()[0];
    for(k=0;k<BlockSize[2];k++)
      {
	idx[2] = HammerMax(0, HammerMin((StartPoint[2]+k), Image_Z_Size-1));
	for(i=0;i<BlockSize[1];i++)
	  {
	    idx[1] = HammerMax(0, HammerMin((StartPoint[1]+i), Image_X_Size-1));
	    for(j=0;j<BlockSize[0];j++)
	      {
		idx[0] = HammerMax(0, HammerMin((StartPoint[0]+j), Image_Y_Size-1));
		if(Img->GetPixel(idx)[1]==0)
		  BK_Num++;
		index = IndexImg->GetPixel(idx);
		if(index<0)
		  continue;
		Dist = HammerSQR(DeformArray[index][0]) + HammerSQR(DeformArray[index][1]) + HammerSQR(DeformArray[index][2]);
		ResidualError += sqrt(Dist);
		Num += 1.0;
	      }
	  }
      }
    if(Num>0)
      ResidualError /= Num;
    *Return_Value = ResidualError;
    return BK_Num;
  }

  template <class TInputImage, class TOutputImage>
  int 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::EstimateInitialControlPoint(DeforamtionVectorArrayType Residual_Deform, DeformationFieldPointer BlockDeform, IntImagePointer BlockIndex, \
				vnl_matrix<double> &FixedCord, vnl_matrix<double> &MovingCord, IndexType StartPoint, IndexType BlockLength, IntImagePointer DrivingPointImage,\
				int Image_X_Size, int Image_Y_Size, int Image_Z_Size) const
  {    
    int l,m,n;
    int index;
    int DrivingPointNum_in_Block;
    IndexType idx, block_idx;
    DeformationVectorType deform, zero_deform;

    zero_deform[0] = 0;	zero_deform[1] = 0;	zero_deform[2]=0;
    DrivingPointNum_in_Block = 0;				
    for(l=0;l<BlockLength[2];l++)
      {
	idx[2] = HammerMax(HammerMin(Image_Z_Size-1, StartPoint[2]+l), 0);
	for(m=0;m<BlockLength[1];m++)
	  {
	    idx[1] = HammerMax(HammerMin(Image_X_Size-1, StartPoint[1]+m), 0);
	    for(n=0;n<BlockLength[0];n++)
	      {
		idx[0] = HammerMax(HammerMin(Image_Y_Size-1, StartPoint[0]+n), 0);
		block_idx[0] = n;
		block_idx[1] = m;
		block_idx[2] = l;
		index = DrivingPointImage->GetPixel(idx);
		if(index>=0)
		  {			
		    for(int s=0;s<InputImageDimension;s++)
		      deform[s] = Residual_Deform[index][s];
		    BlockDeform->SetPixel(block_idx, deform);
		    MovingCord(DrivingPointNum_in_Block,1) = Residual_Deform[index][1] + m;
		    MovingCord(DrivingPointNum_in_Block,2) = Residual_Deform[index][0] + n;
		    MovingCord(DrivingPointNum_in_Block,3) = Residual_Deform[index][2] + l;
		    FixedCord(DrivingPointNum_in_Block,1) = m;
		    FixedCord(DrivingPointNum_in_Block,2) = n;
		    FixedCord(DrivingPointNum_in_Block,3) = l;						
		    BlockIndex->SetPixel(block_idx, DrivingPointNum_in_Block);
		    DrivingPointNum_in_Block ++;
		  }
		else
		  {
		    BlockIndex->SetPixel(block_idx, -1);
		    BlockDeform->SetPixel(block_idx, zero_deform);
		  }
	      }
	  }
      }//end of l,m,n
    return DrivingPointNum_in_Block;
  }

  template <class TInputImage, class TOutputImage>
  int 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::ClusteringControlPointInBlock(DeformationFieldPointer BlockDeform, IntImagePointer BlockIndex, int Cluster_Radius, \
				  vnl_matrix<double> &FixedCord, vnl_matrix<double> &MovingCord, IntImagePointer PrevBlockIndexImage, vnl_matrix<double> &PrevFixedCord, int PrevNum,\
				  IndexType StartPoint, IndexType BlockLength, IntImagePointer DrivingPointImage, int Image_X_Size, int Image_Y_Size, int Image_Z_Size)
  {    
    int t;
    int DrivingPointNum_in_Block;
    DeformationVectorType Mean_Deform, dfm;
    float weight;
    IndexArrayType Neighbor;
    IndexType idx, block_idx;
    CreatePointMatchingNeighbor(Neighbor, Cluster_Radius);		
    typedef typename itk::ImageRegionIterator<IntImageType> IndexImageIteratorType;
		
    IndexImageIteratorType Block_Iter(BlockIndex, BlockIndex->GetLargestPossibleRegion());
    IndexImageIteratorType PrevBlock_Iter(PrevBlockIndexImage, PrevBlockIndexImage->GetLargestPossibleRegion());
    for(Block_Iter.GoToBegin(),PrevBlock_Iter.GoToBegin();!Block_Iter.IsAtEnd();++Block_Iter,++PrevBlock_Iter)
      {
	Block_Iter.Set(PrevBlock_Iter.Get());
      }

    DrivingPointNum_in_Block = 0;
    for(t=0;t<PrevNum;t++)
      {							
	idx[1] = PrevFixedCord(t, 1);
	idx[0] = PrevFixedCord(t, 2);
	idx[2] = PrevFixedCord(t, 3);
	if(BlockIndex->GetPixel(idx)<0)
	  continue;
	for(unsigned int s=0;s<InputImageDimension;s++)
	  Mean_Deform[s] = 0;
	weight = 0;
	for(unsigned int s=0;s<Neighbor.size();s++)
	  {
	    block_idx[0] = idx[0] + Neighbor[s][0];
	    block_idx[1] = idx[1] + Neighbor[s][1];
	    block_idx[2] = idx[2] + Neighbor[s][2];
	    if(block_idx[0]<0 || block_idx[1]<0 || block_idx[2]<0 || block_idx[0]>=BlockLength[0] || block_idx[1]>=BlockLength[1] || block_idx[2]>=BlockLength[2])
	      continue;
	    if(PrevBlockIndexImage->GetPixel(block_idx)>=0)
	      {
		dfm = BlockDeform->GetPixel(block_idx);
		for(int ss=0;ss<3;ss++)
		  Mean_Deform[ss] += dfm[ss];
		if(block_idx[0]!=idx[0] || block_idx[1]!=idx[1] || block_idx[2]!=idx[2])
		  BlockIndex->SetPixel(block_idx, -1);
		weight += 1.0;//displace_weight;
	      }
	  }
	dfm = BlockDeform->GetPixel(idx);
	MovingCord(DrivingPointNum_in_Block, 1) = idx[1] + dfm[1];
	MovingCord(DrivingPointNum_in_Block, 2) = idx[0] + dfm[0];
	MovingCord(DrivingPointNum_in_Block, 3) = idx[2] + dfm[2];
	FixedCord(DrivingPointNum_in_Block, 1) = idx[1];
	FixedCord(DrivingPointNum_in_Block, 2) = idx[0];
	FixedCord(DrivingPointNum_in_Block, 3) = idx[2];
	DrivingPointNum_in_Block++;
      }
    return DrivingPointNum_in_Block;
  }


  template <class TInputImage, class TOutputImage>
  void 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::GenerateBlockDeform_Only_NonDrivingPoints(DeformationFieldPointer BlockDeformFld, InputImagePointer MaskImg, int NDP_Num, \
					      vnl_matrix<double> &FixedPoint, int BlockSize, int PointNum) 
  {
    int i, j, index;
    int x, y, z;
    vnl_vector<double> v_ij;
    vnl_matrix<double> U;	
    vnl_matrix<double> D;	
    typedef typename OutputImageType::IndexType LocalIndexType;
    LocalIndexType idx;
    //OutputImageType::IndexType idx;
    DeformationVectorType dfm, zero_dfm;
    typedef typename itk::ImageRegionIterator<InputImageType> MaskImageIteratorType;		
    MaskImageIteratorType Mask_Iter(MaskImg, MaskImg->GetLargestPossibleRegion());
    DeformationFieldIteratorType BlockDeform_Iter(BlockDeformFld, BlockDeformFld->GetLargestPossibleRegion());

    U.set_size(NDP_Num, 4);
    TPS_Kernel.set_size(NDP_Num, PointNum);
    U.set_column(0, 1);

    for(int s=0;s<InputImageDimension;s++)
      zero_dfm[s] = 0;
    index = 0;
    for(Mask_Iter.GoToBegin(),BlockDeform_Iter.GoToBegin();!BlockDeform_Iter.IsAtEnd();++Mask_Iter,++BlockDeform_Iter)
      {
	BlockDeform_Iter.Set(zero_dfm);
	if(Mask_Iter.Get()==0)
	  {
	    U(index,1) = BlockDeform_Iter.GetIndex()[1];
	    U(index,2) = BlockDeform_Iter.GetIndex()[0];
	    U(index,3) = BlockDeform_Iter.GetIndex()[2];
	    index++;
	  }
      }


    //obtain the TPS Kernel
    for(i=0;i<NDP_Num;i++)
      for(j=0;j<PointNum;j++)
	{			
	  x = U(i, 1) - FixedPoint(j, 1) + BlockSize;
	  y = U(i, 2) - FixedPoint(j, 2) + BlockSize;
	  z = U(i, 3) - FixedPoint(j, 3) + BlockSize;
	  TPS_Kernel(i,j) = LookingUpTable[z][x][y];
	}	

    D = U * TPS_Affine + TPS_Kernel * TPS_Param;

    index = 0;
    for(Mask_Iter.GoToBegin(),BlockDeform_Iter.GoToBegin();!BlockDeform_Iter.IsAtEnd();++Mask_Iter,++BlockDeform_Iter)
      {
	if(Mask_Iter.Get()==0)
	  {
	    idx = BlockDeform_Iter.GetIndex();
	    dfm[1] = D(index, 1) - idx[1];
	    dfm[0] = D(index, 2) - idx[0];
	    dfm[2] = D(index, 3) - idx[2];
	    index++;
	  }
      }
  }

  template <class TInputImage, class TOutputImage>
  void 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::Make3DLookupTable(float ***Table, int Offset) const
  {
    int i, j, k;
    float dist;
    for(k=-Offset;k<=Offset;k++)
      for(i=-Offset;i<=Offset;i++)
	for(j=-Offset;j<=Offset;j++)
	  {
	    dist = k*k + i*i + j*j;
	    Table[k+Offset][i+Offset][j+Offset] = -sqrt(dist);
	  }
  }


  template <class TInputImage, class TOutputImage>
  void 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::GenerateDeformationbyTPS_Bookstein(ImageAttributePointerType &MdlImg, DeformationFieldPointer PrevDeformFld, DeformationFieldPointer OutputDeformFld, \
				       DeforamtionVectorArrayType DeformArray, IndexArrayType TemplateDrivingPoint, int Template_DrivingPixel_Num, int Image_X_Size, int Image_Y_Size, int Image_Z_Size,\
				       InputImagePointer TPSMaskImg, InputImagePointer TPSOutputImage) 
  {
    /************************************************************************/
    /* 
       The program try to fit the value of (DeformArrary-PrevDeformFld) and put the result in OutputDeformFld
    */
    /************************************************************************/
    int i, j, k, l, m2, n, s, t;
    int DrivingPointNum_in_Block, Prev_Num;
    float weight;
    int index;
    int Cluster_Radius;
    vnl_matrix<double> V, Y, PrevFixedCord;		
    float Overall_Residual_Error;
    int TotalPixelNum;
    int BlockOffset;
    int BKCovered, BK_Thresh;
    int block_index, NDP_Number, Total_Control_Point_Num, Total_TPS_Data_Num;		
    float Lemda;
    IndexType idx, blockidx, StartPoint, TPSBlockSize, ImageSize;
    DeformationVectorType ZeroDeform, dfm, prev_dfm, curr_dfm;
		

    BlockOffset = (m_BlockSize - m_OverlapSize)/2;
    TotalPixelNum = m_BlockSize*m_BlockSize*m_BlockSize;
    BK_Thresh = (TotalPixelNum*24)>>5;
    for(s=0;s<InputImageDimension;s++)
      {
	TPSBlockSize[s] = m_BlockSize;
	ImageSize[s] = MdlImg->GetLargestPossibleRegion().GetSize()[s];
      }
    //allocate the image here
    typedef itk::Image<int, 3> IntImageType;
    typedef itk::Image<float, 3> FloatImageType;
    typename IntImageType::Pointer DrivingPointImage = IntImageType::New();
    DeforamtionVectorArrayType Residual_Deform;
    typename FloatImageType::Pointer WeightImg = FloatImageType::New();
    DeformationFieldPointer BlockDeform = DeformationFieldType::New();
    typename IntImageType::Pointer BlockIndex = IntImageType::New();
    typename IntImageType::Pointer PrevBlockIndex = IntImageType::New();
    InputImagePointer Block_Mask = InputImageType::New();
    DeformationFieldPointer DeformFld = DeformationFieldType::New();

    DrivingPointImage->CopyInformation(MdlImg);
    DrivingPointImage->SetRegions(MdlImg->GetLargestPossibleRegion());
    DrivingPointImage->Allocate();

    Residual_Deform.clear();
    Residual_Deform.resize(Template_DrivingPixel_Num);

    WeightImg->CopyInformation(MdlImg);
    WeightImg->SetRegions(MdlImg->GetLargestPossibleRegion());
    WeightImg->Allocate();

    typedef typename DeformationFieldType::IndexType LocalIndexType;
    typedef typename DeformationFieldType::SizeType LocalSizeType;
    typedef typename DeformationFieldType::RegionType LocalRegionType;
    LocalIndexType Deform_Start;
    LocalSizeType Deform_Size;
    LocalRegionType Deform_Region;

    // 		DeformationFieldType::IndexType Deform_Start;
    // 		DeformationFieldType::SizeType Deform_Size;
    // 		DeformationFieldType::RegionType Deform_Region;
    for(s=0;s<InputImageDimension;s++)
      {
	Deform_Start[s] = 0;
	Deform_Size[s] = m_BlockSize;
      }
    Deform_Region.SetSize(Deform_Size);
    Deform_Region.SetIndex(Deform_Start);
    BlockDeform->SetRegions(Deform_Region);
    BlockDeform->Allocate();

    DeformFld->CopyInformation(PrevDeformFld);
    DeformFld->SetRegions(PrevDeformFld->GetLargestPossibleRegion());
    DeformFld->Allocate();

    IntImageType::IndexType Int_Start;
    IntImageType::SizeType Int_Size;
    IntImageType::RegionType Int_Region;
    for(s=0;s<InputImageDimension;s++)
      {
	Int_Start[s] = 0;
	Int_Size[s] = m_BlockSize; 
      }
    Int_Region.SetSize(Int_Size);
    Int_Region.SetIndex(Int_Start);
    BlockIndex->SetRegions(Int_Region);
    BlockIndex->Allocate();
    PrevBlockIndex->SetRegions(Int_Region);
    PrevBlockIndex->Allocate();

    IndexType Input_Start;
    SizeType Input_Size;
    RegionType Input_Region;
    for(s=0;s<InputImageDimension;s++)
      {
	Input_Start[s] = 0;
	Input_Size[s] = m_BlockSize;
      }
    Input_Region.SetSize(Input_Size);
    Input_Region.SetIndex(Input_Start);
    Block_Mask->SetRegions(Input_Region);
    Block_Mask->Allocate();

		
    FixedCord.set_size(TotalPixelNum, 4);		
    MovingCord.set_size(TotalPixelNum, 4);
    FixedCord.set_column(0, 1);
    MovingCord.set_column(0, 1);

    //define the iterators
    typedef typename itk::ImageRegionIterator <IntImageType> IntImageIteratorType;
    typedef typename itk::ImageRegionIterator <FloatImageType> FloatImageIteratorType;
		
    IntImageIteratorType DrivingPointImageIter = IntImageIteratorType(DrivingPointImage, DrivingPointImage->GetLargestPossibleRegion());
    IntImageIteratorType BlockIter = IntImageIteratorType(BlockIndex, BlockIndex->GetLargestPossibleRegion());
    IntImageIteratorType PrevBlockIter = IntImageIteratorType(PrevBlockIndex, PrevBlockIndex->GetLargestPossibleRegion());
    FloatImageIteratorType WeightImgIter = FloatImageIteratorType(WeightImg, WeightImg->GetLargestPossibleRegion());
    DeformationFieldIteratorType DeformFldIter = DeformationFieldIteratorType(DeformFld, DeformFld->GetLargestPossibleRegion());
    InputImageIteratorType TPSOutputImageIter = InputImageIteratorType(TPSOutputImage, TPSOutputImage->GetLargestPossibleRegion());

    for(DrivingPointImageIter.GoToBegin();!DrivingPointImageIter.IsAtEnd();++DrivingPointImageIter)
      {
	DrivingPointImageIter.Set(-1);
      }
    for(WeightImgIter.GoToBegin();!WeightImgIter.IsAtEnd();++WeightImgIter)
      {
	WeightImgIter.Set(0);
      }

    for(s=0;s<InputImageDimension;s++)
      ZeroDeform[s] = 0;
    for(DeformFldIter.GoToBegin();!DeformFldIter.IsAtEnd();++DeformFldIter)
      {
	DeformFldIter.Set(ZeroDeform);
      }
    for(TPSOutputImageIter.GoToBegin();!TPSOutputImageIter.IsAtEnd();++TPSOutputImageIter)
      {
	TPSOutputImageIter.Set(0);
      }

		
    for(t=0;t<Template_DrivingPixel_Num;t++)
      {
	for(s=0;s<InputImageDimension;s++)
	  idx[s] = TemplateDrivingPoint[t][s];
	DrivingPointImage->SetPixel(idx, t);
	prev_dfm = PrevDeformFld->GetPixel(idx);			
	for(s=0;s<InputImageDimension;s++)
	  {
	    Residual_Deform[t][s] = DeformArray[t][s] - prev_dfm[s];
	  }
      }

    if(m_TPSBufferOccupied == false)
      {				
	printf("Initializing...\n");
	ControlPointArray.clear();
	TPSData.clear();
	Element_In_Each_Block.clear();
	StartPoint_In_Each_Block.clear();
	StartPoint_In_TPSData.clear();
	//calculate the L-1 
	block_index = 0;
	Total_Control_Point_Num=0;
	Total_TPS_Data_Num =0;		
	for(k=0;k<Image_Z_Size;k+=m_OverlapSize)
	  {
	    for(i=0;i<Image_X_Size;i+=m_OverlapSize)
	      {
		for(j=0;j<Image_Y_Size;j+=m_OverlapSize)
		  {				
		    /************************************************************************/
		    /* Step1 Determine whether need to do TPS in current block              */
		    /************************************************************************/
		    StartPoint[1] = i - BlockOffset;
		    StartPoint[0] = j - BlockOffset;
		    StartPoint[2] = k - BlockOffset;
					
		    StartPoint_In_Each_Block.push_back(Total_Control_Point_Num);
		    StartPoint_In_TPSData.push_back(Total_TPS_Data_Num);

		    BKCovered = EstimateResidualError(MdlImg, Residual_Deform, DrivingPointImage, &Overall_Residual_Error, StartPoint, TPSBlockSize);
					
		    if(BKCovered >BK_Thresh )
		      {
			Element_In_Each_Block.push_back(0);
			block_index++;
			continue;
		      }
		    // 						if(BKCovered>TotalPixelNum-BK_Thresh)
		    // 							Lemda = ptParm->TPS_Lemda*3;
		    // 						else
		    // 						{
		    // 							if(BKCovered>0)
		    // 								Lemda = ptParm->TPS_Lemda*2;
		    // 							else
		    // 								Lemda = ptParm->TPS_Lemda;
		    // 						}	
		    // 						Lemda = (1.0 + 1.0*((float)BKCovered/(float)TotalPixelNum))*ptParm->TPS_Lemda;

		    if(Overall_Residual_Error<m_ResidualToleracne)
		      {
			printf("(%d %d %d)-->%f   (no need to fit the residual error)", k, i, j, Overall_Residual_Error);
			Element_In_Each_Block.push_back(0);
			block_index++;
			continue;
		      }
		    /************************************************************************/
		    /* Step2 Collect the TPS Data                                           */
		    /************************************************************************/
						
		    DrivingPointNum_in_Block = EstimateInitialControlPoint(Residual_Deform, BlockDeform, BlockIndex, FixedCord, MovingCord, StartPoint, \
									   TPSBlockSize, DrivingPointImage, ImageSize[0], ImageSize[1], ImageSize[2]);

		    if(DrivingPointNum_in_Block<MIN_TPS_POINT)
		      {
			Element_In_Each_Block.push_back(0);
			block_index++;
			printf("DNP (%d < %d)\n ", DrivingPointNum_in_Block, MIN_TPS_POINT);
			continue;
		      }
		    //assert(MinPixelNum<ptParm->MaxTPSPoint);
		    //printf("The minimal TPS point number is %d\n", MinPixelNum);
		    printf("Initial Driving Point Number is %d ", DrivingPointNum_in_Block);
		    if(DrivingPointNum_in_Block > m_MaxTPSPoint)
		      {
			//cluster the driving point
			Cluster_Radius = 1;
			for(BlockIter.GoToBegin(),PrevBlockIter.GoToBegin();!BlockIter.IsAtEnd();++BlockIter,++PrevBlockIter)
			  {
			    PrevBlockIter.Set(BlockIter.Get());
			  }
							
			Prev_Num = DrivingPointNum_in_Block;
			PrevFixedCord = FixedCord.extract(Prev_Num, 4, 0, 0);

			while(DrivingPointNum_in_Block>m_MaxTPSPoint)
			  {
			    DrivingPointNum_in_Block = ClusteringControlPointInBlock(BlockDeform, BlockIndex, Cluster_Radius, FixedCord, MovingCord, \
										     PrevBlockIndex, PrevFixedCord, Prev_Num, StartPoint, TPSBlockSize, DrivingPointImage, ImageSize[0], ImageSize[1], ImageSize[2]);
			    printf("->%d ", DrivingPointNum_in_Block);
			    Cluster_Radius++;
			  }
		      }
		    if(DrivingPointNum_in_Block<MIN_TPS_POINT)
		      {
			Element_In_Each_Block.push_back(0);							
			block_index++;
			printf("DNP (%d < %d)\n ", DrivingPointNum_in_Block, MIN_TPS_POINT);
			continue;
		      }
		    printf("\n");
		    /*End of Step 2*/

		    /**********************************************************************************/
		    /* Step 3 Compute the  RESIDUAL deformation field which is represented by TPS     */
		    /**********************************************************************************/

		    V = FixedCord.extract(DrivingPointNum_in_Block, 4, 0, 0);
		    //Y = MovingCord.extract(DrivingPointNum_in_Block, 4, 0, 0);					
		    PrepareTPSKernel_Compute_L_InverseBookStein(V, DrivingPointNum_in_Block, Lemda, m_BlockSize);

		    Element_In_Each_Block[block_index] = DrivingPointNum_in_Block+4;						
		    Element_In_Each_Block.push_back(DrivingPointNum_in_Block+4);
		    for(m2=0;m2<DrivingPointNum_in_Block;m2++)
		      {
			idx[1] = V(m2,1);
			idx[0] = V(m2,2);
			idx[2] = V(m2,3);
			ControlPointArray.push_back(idx);
			Total_Control_Point_Num++;
		      }						
		    for(m2=0;m2<DrivingPointNum_in_Block+4;m2++)
		      for(n=0;n<DrivingPointNum_in_Block+4;n++)
			{
			  TPSData.push_back(TPS_Psi(m2,n));
			  Total_TPS_Data_Num++;
			}
		    block_index ++;
		  }//end of i,j,k	
	      }
	  }
	StartPoint_In_Each_Block.push_back(Total_Control_Point_Num);
	StartPoint_In_TPSData.push_back(Total_TPS_Data_Num);
	m_TPSBufferOccupied = true;
      }
    block_index = 0;
    printf("Regularize the deformation field with TPS \n");
    for(k=0;k<Image_Z_Size;k+=m_OverlapSize)
      {
	for(i=0;i<Image_X_Size;i+=m_OverlapSize)
	  {
	    for(j=0;j<Image_Y_Size;j+=m_OverlapSize)
	      {	
		if(Element_In_Each_Block[block_index] == 0)
		  {
		    block_index++;
		    continue;
		  }
		//Read the data from TPSData
		TPS_Psi.set_size(Element_In_Each_Block[block_index], Element_In_Each_Block[block_index]);
		l = StartPoint_In_TPSData[block_index];
		for(m2=0;m2<Element_In_Each_Block[block_index];m2++)
		  for(n=0;n<Element_In_Each_Block[block_index];n++)
		    {
		      TPS_Psi(m2, n) = TPSData[l];
		      l++;
		    }

		for(m2=StartPoint_In_Each_Block[block_index];m2<StartPoint_In_Each_Block[block_index+1];m2++)
		  {
		    idx[1] = HammerMax(0, HammerMin(ControlPointArray[m2][1] + i - BlockOffset, Image_X_Size-1));
		    idx[0] = HammerMax(0, HammerMin(ControlPointArray[m2][0] + j - BlockOffset, Image_Y_Size-1));
		    idx[2] = HammerMax(0, HammerMin(ControlPointArray[m2][2] + k - BlockOffset, Image_Z_Size-1));					
		    t = DrivingPointImage->GetPixel(idx);
		    assert(t>=0);
		    FixedCord(m2, 1) = ControlPointArray[m2][1];
		    FixedCord(m2, 2) = ControlPointArray[m2][0];
		    FixedCord(m2, 3) = ControlPointArray[m2][2];
		    MovingCord(m2, 1) = Residual_Deform[t][1] + ControlPointArray[m2][1];
		    MovingCord(m2, 2) = Residual_Deform[t][0] + ControlPointArray[m2][0];
		    MovingCord(m2, 3) = Residual_Deform[t][2] + ControlPointArray[m2][2];				
		  }
		V = FixedCord.extract(Element_In_Each_Block[block_index]-4, 4, 0, 0);
		Y = MovingCord.extract(Element_In_Each_Block[block_index]-4, 4, 0, 0);

		UpdateTPSParameter_Bookstein_V2(Y);
		//printf("Bending energy = %f\n", BendingEnergy);
		NDP_Number = 0;
		for(l=0;l<m_BlockSize;l++)
		  {
		    idx[2] = HammerMax(0, HammerMin(k+l-BlockOffset, Image_Z_Size-1));
		    blockidx[2] = l;
		    for(m2=0;m2<m_BlockSize;m2++)
		      {
			idx[1] = HammerMax(0, HammerMin(i+m2-BlockOffset, Image_X_Size-1));
			blockidx[1] = m2;
			for(n=0;n<m_BlockSize;n++)
			  {
			    idx[0] = HammerMax(0, HammerMin(j+n-BlockOffset, Image_Y_Size-1));
			    blockidx[0] = n;
			    index = TPSMaskImg->GetPixel(idx);
			    Block_Mask->SetPixel(blockidx, index);
			    if(index==0)
			      NDP_Number++;
			  }
		      }
		  }
		//GenerateBlockDeform(BlockDeform, V, ptParm->BlockSize, Element_In_Each_Block[block_index]-4);
		GenerateBlockDeform_Only_NonDrivingPoints(BlockDeform, Block_Mask, NDP_Number, V, m_BlockSize, Element_In_Each_Block[block_index]-4);
		block_index++;
		/************************************************************************/
		/* Step3 Store the data                                                 */
		/************************************************************************/
		for(l=0;l<m_BlockSize;l++)
		  {
		    idx[2] = HammerMax(0, HammerMin(Image_Z_Size-1, k+l-BlockOffset));
		    blockidx[2] = l;
		    for(m2=0;m2<m_BlockSize;m2++)
		      {
			idx[1] = HammerMax(0, HammerMin(Image_X_Size-1, i+m2-BlockOffset));
			blockidx[1] = m2;
			for(n=0;n<m_BlockSize;n++)
			  {
			    idx[0] = HammerMax(0, HammerMin(Image_Y_Size-1, j+n-BlockOffset));
			    blockidx[0] = n;
			    if(Block_Mask->GetPixel(blockidx)==0)
			      {
				curr_dfm = DeformFld->GetPixel(idx);
				dfm = BlockDeform->GetPixel(blockidx);
				for(s=0;s<InputImageDimension;s++)
				  curr_dfm[s] += dfm[s];
				DeformFld->SetPixel(idx, curr_dfm);
				WeightImg->SetPixel(idx, (WeightImg->GetPixel(idx)+1.0));
			      }
			  }
		      }
		  }//end of step 3
	      }//end of i,j,k			
	  }
      }
		
    InputImageIteratorType TPSMaskIter = InputImageIteratorType(TPSMaskImg, TPSMaskImg->GetLargestPossibleRegion());
    DeformationFieldIteratorType OutputDeformFldIter = DeformationFieldIteratorType(OutputDeformFld, OutputDeformFld->GetLargestPossibleRegion());
    for(OutputDeformFldIter.GoToBegin(),TPSMaskIter.GoToBegin(),WeightImgIter.GoToBegin(),DeformFldIter.GoToBegin(),TPSOutputImageIter.GoToBegin();!DeformFldIter.IsAtEnd();++WeightImgIter,++DeformFldIter,++TPSOutputImageIter,++TPSMaskIter,++OutputDeformFldIter)
      {
	if(TPSMaskIter.Get()==1)
	  continue;
	weight = WeightImgIter.Get();
	if(weight > 0)
	  {										
	    dfm = DeformFldIter.Get();
	    for(s=0;s<InputImageDimension;s++)
	      dfm[s] = dfm[s]/weight;
	    OutputDeformFldIter.Set(dfm);
	    TPSOutputImageIter.Set(1);
	    //printf("Output: (%f %f %f)\n", OutputDeformFld[k][i][j].x, OutputDeformFld[k][i][j].y, OutputDeformFld[k][i][j].z);
	  }
	else
	  {
	    OutputDeformFldIter.Set(ZeroDeform);
	  }
      }
  }

  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  //FOR INTENSITY HAMMER	
  template <class TInputImage, class TOutputImage>
  void 
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::SearchThresholdForObtainingTopXPercentOfDrivingVoxels(ImageAttributePointerType &AttributeImage, std::vector<float> &Criteria, float CurrentRequirePercent) 
  {
    /*
      Criteria[0] --> Variance_Up
      Criteria[1] --> CannyEdge_Up
      Criteria[2] --> cannyMinimalRequiredEdgeValue
    */
    int Hist[256];
    float VoxNum;
    float totalCanny, totalVariance;
    AttributeImageConstIteratorType it = AttributeImageConstIteratorType(AttributeImage, AttributeImage->GetLargestPossibleRegion());
		
    VoxNum = 0;
    totalCanny = 0;
    totalVariance = 0;
    for(it.GoToBegin();!it.IsAtEnd();++it)
      {
	if(it.Get().GetTissueType()>0)
	  VoxNum += 1.0;
      }
    for(int t=0;t<256;t++)
      Hist[t] = 0;
    /* get the histogram of edge map, for fast determination of edge threshold */
    for(it.GoToBegin();!it.IsAtEnd();++it)
      {
	Hist[it.Get().GetCSFBackground()]++;
      } 
    /* get driving points from canny edge map first*/
    totalCanny = 0 ;
    for(int m=254;m>=Criteria[2];m--)
      {
	totalCanny += Hist[m+1];
	Criteria[1] = m;
	if(totalCanny/VoxNum>CurrentRequirePercent)
	  break;
      }
    /* points in the canny edge map are not enough */
    Criteria[0] = 255 ;	
    if( totalCanny/VoxNum < CurrentRequirePercent ) 
      {
	for(int t=0;t<255;t++)
	  Hist[t] = 0;
	for(it.GoToBegin();!it.IsAtEnd();++it)
	  {
	    if(it.Get().GetCSFBackground()<Criteria[1])
	      {
		Hist[it.Get().GetEdge()]++;
	      }
	  }
	/* get driving points from variance map */
	totalVariance = 0 ;
	for(int m=254; m>=0; m--)
	  {
	    totalVariance += Hist[m+1];
	    Criteria[0] = m;
	    if((totalCanny+totalVariance)/VoxNum>CurrentRequirePercent)
	      break;
	  }
      }
  }

  /*
    The core function to select driving voxen in intensity hammer
  */
  template <class TFixedImage, class TMovingImage>
  unsigned int 
  HammerDeformableRegistrationImageFilter<TFixedImage, TMovingImage>
  ::IdentifyDrivingVoxels_GR_4I( ImageAttributePointerType avPointer, std::vector<IndexType> & drivingVoxels, std::vector<float> &Criteria)
  {
    int nVoxels = 0;
    int nEdge = 0;
    itk::ImageRegionIteratorWithIndex<ImageAttributeType> 
      itAV( avPointer, avPointer->GetLargestPossibleRegion() );

    typename InputImageType::Pointer mask = InputImageType::New();
    mask->CopyInformation( avPointer );
    mask->SetRegions( mask->GetLargestPossibleRegion() );
    mask->Allocate();
    mask->FillBuffer( 0 );

    //std::cout << "attribute:\n" << avPointer->GetLargestPossibleRegion() << std::endl;
    //std::cout << "mask:\n" << mask->GetLargestPossibleRegion() << std::endl;

    drivingVoxels.clear();
    for ( itAV.GoToBegin(); !itAV.IsAtEnd(); ++itAV)
      {
	AttributeVectorType a = itAV.Get();			       
	if ( a.IsQualifiedDrivingVoxel_GR_4I( Criteria ) )
	  {
	    drivingVoxels.push_back( itAV.GetIndex() );
	    nVoxels ++;
	  }
	if(a.GetEdge()>0)
	  nEdge++;
      }

    for(unsigned int k=0;k<drivingVoxels.size();k++)
      {
	IndexType idx = drivingVoxels[k];
	mask->SetPixel(idx, 250);
      }
    typedef itk::ImageFileWriter< InputImageType > WriterType;
    typename WriterType::Pointer writer = WriterType::New();
    writer->SetInput(mask);
    writer->SetFileName("DVImg.hdr");
    writer->Update();

    // std::cout<<"DV Number is "<<nVoxels<<" Edge Number is "<<nEdge<<std::endl;
    return nVoxels;
  }

  template <class TInputImage, class TOutputImage>
  void
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::SmoothDeformation_OneTime_4I(DeformationFieldPointer DeformFld)
  {
    SizeType size = DeformFld->GetRequestedRegion().GetSize();
    float LocalRatio;
    DeformationFieldIteratorType DeformFld_Iter(DeformFld, DeformFld->GetLargestPossibleRegion());
    DeformationVectorType PointDeform, CurrentDeform, MeanDeform;  
    IndexType CurrentIndex, PointIndex;
    RegionType dummyRegion = DeformFld->GetLargestPossibleRegion();

    LocalRatio = 0.5;
    for(DeformFld_Iter.GoToBegin();!DeformFld_Iter.IsAtEnd();++DeformFld_Iter)
      {
	float Num = 0;
	PointIndex = DeformFld_Iter.GetIndex();
	PointDeform = DeformFld_Iter.Get();
	for(int s=0;s<InputImageDimension;s++)
	  MeanDeform[s] = 0;
	for(int k=1;k<27;k++)
	  {
	    for(int s=0;s<InputImageDimension;s++)
	      CurrentIndex[s] = PointIndex[s] + m_SubvolumeNeighborhood[k][s];

	    if(!dummyRegion.IsInside(CurrentIndex))
	      {
	      continue;
	      }

	    CurrentDeform = DeformFld->GetPixel(CurrentIndex);
	    for(int s=0;s<InputImageDimension;s++)
	      MeanDeform[s] += CurrentDeform[s];
	    Num += 1;
	  }//end of k
	for(int s=0;s<InputImageDimension;s++)
	  MeanDeform[s] /= Num;

	for(int s=0;s<InputImageDimension;s++)
	  PointDeform[s] += (MeanDeform[s]-PointDeform[s])*LocalRatio;

	for(int s=0;s<InputImageDimension;s++)
	  {
	    if(PointDeform[s]+PointIndex[s]<0)
	      PointDeform[s] = - PointIndex[s];
	    if(PointDeform[s]+PointIndex[s]>=size[s]-1)
	      {
		PointDeform[s] = size[s]-1-PointIndex[s];
	      }
	  }
	DeformFld_Iter.Set(PointDeform);
      }
  }

  template <class TInputImage, class TOutputImage>
  void
  HammerDeformableRegistrationImageFilter<TInputImage, TOutputImage>
  ::EdgePreserveSmoothDeformation_OneTime_4I(ImageAttributePointerType &FixedAttributeImage, DeformationFieldPointer DeformFld)
  {
    SizeType size = DeformFld->GetRequestedRegion().GetSize();
    float LocalRatio;
    DeformationFieldIteratorType DeformFld_Iter(DeformFld, DeformFld->GetLargestPossibleRegion());
    DeformationVectorType PointDeform, CurrentDeform, MeanDeform;  
    IndexType CurrentIndex, PointIndex;
    AttributeVectorType TemplateFeature, CurrentFeature;
    RegionType dummyRegion = FixedAttributeImage->GetLargestPossibleRegion();

    LocalRatio = 0.75-0.25*m_IterationRatio*m_IterationRatio ;
    for(DeformFld_Iter.GoToBegin();!DeformFld_Iter.IsAtEnd();++DeformFld_Iter)
      {
	float Num = 0;
	PointIndex = DeformFld_Iter.GetIndex();
	PointDeform = DeformFld_Iter.Get();
	TemplateFeature = FixedAttributeImage->GetPixel(PointIndex);
	for(int s=0;s<InputImageDimension;s++)
	  MeanDeform[s] = 0;
	for(int k=1;k<27;k++)
	  {
	    for(int s=0;s<InputImageDimension;s++)
	      CurrentIndex[s] = PointIndex[s] + m_SubvolumeNeighborhood[k][s];
	    if(!dummyRegion.IsInside(CurrentIndex))
	      {
	      continue;
	      }
	    CurrentDeform = DeformFld->GetPixel(CurrentIndex);
	    for(int s=0;s<InputImageDimension;s++)
	      MeanDeform[s] += CurrentDeform[s];
	    Num += 1;
	  }
	for(int s=0;s<InputImageDimension;s++)
	  MeanDeform[s] /= Num;

	if(TemplateFeature.GetEdge()>0 ) 
	  {
	    for(int s=0;s<InputImageDimension;s++)
	      PointDeform[s] += (MeanDeform[s]-PointDeform[s])*LocalRatio;
	  }
	else
	  {
	    for(int s=0;s<InputImageDimension;s++)
	      PointDeform[s] += (MeanDeform[s]-PointDeform[s])*LocalRatio;  /*/2.0 ; */  
	  }
	for(int s=0;s<InputImageDimension;s++)
	  {
	    if(PointDeform[s]+PointIndex[s]<0)
	      PointDeform[s] = - PointIndex[s];
	    if(PointDeform[s]+PointIndex[s]>=size[s]-1)
	      {
		PointDeform[s] = size[s]-1-PointIndex[s];
	      }
	  }
	DeformFld_Iter.Set(PointDeform);
      }
  }

} // end namespace itk


#endif
