package uploadutils;

# $Id: uploadutils.pm,v 1.1 2003/12/04 20:03:45 bozyurt Exp $

use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);

use strict;
use Exporter;

$VERSION = 1.00; 
@ISA = qw(Exporter);

@EXPORT = qw();
@EXPORT_OK = qw(); 
%EXPORT_TAGS = ();

use File::Path;
use File::Spec;
use File::Find;
use Carp;

# package globals

my @dicom_keys=qw(PatientsSex PatientsAge Modality Manufacturer ManufacturersModelName MRAcquisitionType ScanningSequence SequenceVariant SequenceName SliceThickness EchoTime RepetitionTime InversionTime MagneticFieldStrength SpacingBetweenSlices FlipAngle ImageOrientationPatient Rows Columns ImagesInAcquisition PixelSpacing SeriesInstanceUID SeriesNumber);



sub create_srb_collection_path {
    my($srb_root, $birn_ID, $visit, $study, $series) = @_;
    # will change most probably
    my $p = "$srb_root/$birn_ID/";
    $p .= "$visit/$study/RawData/$series"; 
    return $p;
}

sub create_container_name {
   my($birn_ID, $visit, $study, $series) = @_;
   $series =~ s/\./_/;
   my $cname = "${birn_ID}_${visit}_${study}_${series}";
   return $cname;
}


sub padnumbers {
    my ($number, $nodigits, $padchar) = @_;
    my $len = length $number;
    return $number unless ($len != $nodigits);
    my $num = "";
    for(my $i= 0; $i < $nodigits - $len; $i++) {
      $num .= $padchar;
    }
    $num .= $number;
    return $num;  
}


sub read_link_list {
    my $fname = $_[0];
    my %link_map = ();
    open(FP,"$fname") or croak("Cannot open $fname:$!");
    while(<FP>) {
	chomp;
        s/^\s+//;
        s/\s+$//;
        next if (length($_) == 0); 
	my ($cid, $bid) = split(/,/, $_,2);
        $link_map{$cid} = $bid;
    }
    close(FP);
    return %link_map;
}

sub create_link_list {
    my ($clin_id_fname, $link_table_fname, $link_list_fname) = @_;
    my @cids = ();
    open(FP, "$clin_id_fname") or croak("Cannot open $clin_id_fname:$!");
    while(<FP>) {
	chomp;
	s/^\s+//;
	s/\s+$//;
        next if (length($_) == 0);
        push @cids, $_;
    }
    close(FP);
    
    my %link_map = ();
    foreach my $cid (@cids) {
	my $cmd = " birnid_gen.sh -find -c $cid -l $link_table_fname";
        open(PROC,"$cmd |") or croak("Cannot open $cmd:$!");
	my $line = <PROC>;
	chomp $line;
        if ( $line =~ /Birn ID=([^,]+)/ ) {
          $link_map{$cid} = $1;     
	}
	close(PROC);
    }
    open(FP,">$link_list_fname") or croak("Cannot create $link_list_fname:$!");
    while ( my ($key, $value) = each %link_map ) {
        print FP "$key,$value\n";
    }
    close(FP);
}

sub create_BIRN_IDs {
    my ($prefix, $link_table_fname, $clin_ids_fname) = @_;
    my $cmd = "birnid_gen.sh -create -p $prefix -l $link_table_fname -cf $clin_ids_fname";
    my $status = system($cmd);
    croak("An error occured during BIRN ID creation!\n") unless ($status == 0);
}


# for UCSD retrospective data ONLY!!!

sub apply_BIRN_IDs {
    my ($link_filename, $root_dir) = @_;
    croak("$link_filename does not exists!") unless (-e $link_filename);
    croak("Directory $root_dir does not exists!") unless (-d $root_dir);

    my %link_map =  read_link_list( $link_filename );
    
    my @slist = dirutils::get_series($root_dir);
    
    my @slist2 = grep(/((bn)?\d\d\d\d)$/, @slist);
    print(join("\n", @slist2),"\n");
    foreach my $series_dir (@slist2) {
	# for retrospective data only
	if ($series_dir =~ /((bn)?\d\d\d\d)$/ ) {
	    my $new_dir = $series_dir;
	    my $cid = $1;
	    my $bid = $link_map{$cid};
	    $new_dir =~ s/(bn)*\d{4,4}/$bid/;
	    #print("new_dir=$new_dir\n");
	    my @files = <$series_dir/*.mr>;
	    foreach my $file (@files) {
		my $old_file = $file;
		my $len = 8;
		if ( $file =~ /(\d+)\.mr/ ) {
		    $len = length($1);
		}
		if ($len == 8) {
		    $file =~ s/((bn)?\d{5,5})(\d+\.mr)/$bid\_$3/;
		} else {
		    $file =~ s/((bn)?\d{4,4})(\d+\.mr)/$bid\_$3/;
		} 
		my $cmd ="mv $old_file $file";
		my $status = system($cmd);
		#print("$cmd\n");
	    }
	    my $cmd = "mv $series_dir $new_dir";
	    system($cmd);
	    print("$cmd\n");
	}
    }    
}

sub convert_to_dicom {
    my ($img_root_dir, $out_dir) = @_;
    croak("Directory '$img_root_dir' does not exists!") unless (-d $img_root_dir);
    croak("Directory '$out_dir' does not exists!") unless (-d $out_dir);
    my $cmd = "convert_to_DICOM.pl -r $img_root_dir -o $out_dir";
    my $status = system($cmd);
    croak("An error occured during dicom conversion! (status: " . ($status / 256) . ")\n") 
         unless ($status == 0);
}

sub get_dicom_header_values {
    my ($fname, @fields) = @_;
    my %value_map = ();
    my %field_map = map { $_ => $_ } @fields;
    open(PROC,"dcmdump $fname | ") or croak("Cannot start process:$!");
    while(<PROC>) {
	chomp;
        my $key;
        if ( /(\w+)$/) {
	    $key = $1;
	}
        next unless ( defined($key) && exists $field_map{$key} );
        my $r;
	if ( /\[([^\[]+)\]/) {
	    $r = $1;
	} elsif ( /US\s+(\d+)/) {
	    $r = $1; 
	}
        $value_map{$key} = $r;
    }

    close(PROC);
    return %value_map;
}

sub create_DICOM_UID {
   my ($ClinTrialProtocolID, $ClinTrialSiteID, $ClinTrialSubjectID, $StudyID, $SeriesNum) = @_;    
   my $uid = "1.3.6.1.4.1.16341.";
   $uid .= strip_leading_zeros($ClinTrialProtocolID);
   $uid .= "." . strip_leading_zeros( $ClinTrialSiteID);
   $uid .= "." . strip_leading_zeros($ClinTrialSubjectID);
   $uid .= "." . strip_leading_zeros($StudyID);
   $uid .= "." . strip_leading_zeros($SeriesNum);

   return $uid;
}

sub create_dicom_SOPInstanceUID {
    my ($species, $proj_ID, $birn_ID, $inst_ID, $visit_ID, $mri_type, $image_data_type, $img_file) = @_;
    my @fields = qw(StudyID InstanceNumber SeriesNumber );
    my $uid = "1.3.6.1.4.1.16341.";
    $uid .= strip_leading_zeros($species);
 
    my %value_map = get_dicom_header_values($img_file, @fields);
    $uid .= "." . strip_leading_zeros($proj_ID);
    my $bid; 
    if ( $birn_ID =~ /(\d{8,8}$)/) {
	$bid = $1;
    }
    my $subj_id = strip_leading_zeros($inst_ID) . "." . $bid;
    $uid .= "." . $subj_id;
    $uid .= "." . strip_leading_zeros( $value_map{StudyID});
    $uid .= "." . strip_leading_zeros( $visit_ID );
    $uid .= "." . strip_leading_zeros( $mri_type );
    $uid .= "." . strip_leading_zeros( $value_map{SeriesNumber});
    $uid .= "." . strip_leading_zeros( $value_map{InstanceNumber});
    return $uid;
}

# robust CSV parser taking into account embedded commas, quotations
sub tokenize_csv {
	my ($line) = @_;
	my @toks = ();
	$_ = $line;

	while (/(\")?([^,|^\"]*)([,|\"])?\s*/g ) {
	    my ($first, $sec, $third) = ($1,$2, $3);
	    if(defined($first)) {
		if ($third eq "," ) {
		    my $x = "$sec$third";
		    if (/([^\"]+)\"/g) {
			$x .= $1;
			# print "x=$x\n";
			push @toks, $x;
		    }
		} else {
		    push @toks, $sec;
		}
	    } else {
		push @toks, $sec;
	    }
	}
        pop @toks; # last one is spurious
	return @toks;
}


sub load_clinical_metadata {
    my ($fname) = @_;
    open(FP, $fname) or croak("Cannot open file $fname:$!");
    my %name2idx_map = ( "BirnID" => 1, 
        "InstitutionID" => 2, "ProjectID" => 3, "VisitID" => 4,
	"ScanType" => 5, "StudyID" => 6, "Gender" => 7, "Age" => 8,
        "AgeQualifier" => 9,
	"MMSE" => 10, "Diagnosis" => 11, "Weighting" => 12,
        "StationName" => 13 ,"FacialAnonym" => 14 );  		 
    my $data_format = "Series Path, BirnID,InstitutionID, ProjectID," .
        "VisitID, ScanType,StudyID, Gender, Age, AgeQualifier, MMSE," .
        "Diagnosis,Weighting, StationName,FacialAnonym";
  
    my %meta_map = ();
    while(<FP>) {
	chomp;
        s/^\s+//;
	s/\s+$//;
        next if (length $_ == 0);
        next if ( /^#/ );
        my $line = $_;
        # my @toks = split(/,/);
        my @toks = tokenize_csv($line);
        print "tokens=" . join(",",@toks) . "\n";
        if ( scalar @toks != 15) {
	    warn("Needs to have 15 comma separated tokens:\nFormat:\n$data_format\n---\n$line\n");
	    next;
	}

        # series_path, BirnID, InstitutionID,
        # Project ID, Visit ID
        # Scan Type, Image Data Type (not used)
        # StudyID
        # Gender, Age,AgeQualifier, MMSE, Diagnosis, 
        # Weighting,StationName, FacialAnonym 
        my %cmeta_map = ();
        $cmeta_map{BirnID} = $toks[1];
        $cmeta_map{InstitutionID} = $toks[2];
        $cmeta_map{ProjectID} = $toks[3];
        $cmeta_map{VisitID} = $toks[4];
        # $cmeta_map{Species} = $toks[5];
        $cmeta_map{ScanType} = $toks[5];
        $cmeta_map{StudyID} = $toks[6]; 

        $cmeta_map{Gender} = $toks[7];
	$cmeta_map{Age} = $toks[8];
        $cmeta_map{AgeQualifier} = $toks[9]; 
	$cmeta_map{MMSE} = $toks[10];
        $cmeta_map{Diagnosis} = $toks[11];
        $cmeta_map{Weighting} = $toks[12];
        $cmeta_map{StationName} = $toks[13];
        $cmeta_map{FacialAnonym} = $toks[14];
        $meta_map{$toks[0]} = \%cmeta_map;
    } 
    close(FP);
    return %meta_map;
}



sub strip_leading_zeros {
    my ($num) = @_;
    $num =~ s/^0+//;
    return $num;
}

sub get_dicom_fields {
    my ($series_path) = @_;
    my @files = <$series_path/*.dcm>;
    croak("No DICOM files in $series_path!") unless (scalar @files > 0);
    
    my %dcm_map = dcmutils::get_dicom_header_values($files[0], @dicom_keys );
    return %dcm_map;
}

# creates the SRB collection names and returns in an array of the following order
# Subject Visit Study Series
sub prep_collection_names {
    my ($dcm_map_ref, $cmeta_map_ref) = @_;
    my %cmeta_map = %{$cmeta_map_ref};
    my %dcm_map = %{$dcm_map_ref};
    my $visit_col = "Visit_" . uploadutils::padnumbers($cmeta_map{VisitID},3, '0');
    my $study_col = "Study_" .  uploadutils::padnumbers($cmeta_map{StudyID}, 4, '0');
    my $series_col = uploadutils::padnumbers($dcm_map{SeriesNumber},3,'0') . ".ser";    
    my $subject_col =  $cmeta_map{BirnID};
    if ( $subject_col =~ /(\d\d\d\d\d\d\d\d)$/ ) {
        $subject_col = $1;
    }
    $subject_col =  uploadutils::padnumbers($cmeta_map{InstitutionID},4,'0') . $subject_col;
    my @arr = ( $subject_col, $visit_col, $study_col, $series_col);
    return @arr; 
}


# meta data preparation routines

sub get_acquisition_plane {
  my ($img_orient) = @_;
  my @dc = split(/\\/,$img_orient);
  if ($dc[1] == 1 && $dc[5] == -1 &&
      $dc[0] == 0 && $dc[2] == 0 && $dc[3] == 0 && $dc[3]) 
  {
     return "SAG";                                  
  }
  # TODO for axial and coronal

}

sub get_acq_plane {
    my ($series_path, $img_orient) = @_;
    my @dc = split(/\\/,$img_orient);
    my @normal_vec = ();
    
    # DICOM coordinates are in lps, so correct it here
    $dc[0] *= -1; $dc[1] *= -1; $dc[3] *= -1; $dc[4] *= -1;
    # Rr 0 , Ra 1, Rs 2, Cr 3, Ca 4, Cs 5
    # get the first and last images in series
    # my @origs_ref = get_image_positions($series_path);
    
    $normal_vec[0] = $dc[1] * $dc[5] - $dc[2] * $dc[4]; # Nr
    $normal_vec[1] = $dc[2] * $dc[3] - $dc[0] * $dc[5]; # Na
    $normal_vec[2] = $dc[0] * $dc[3] - $dc[1] * $dc[3]; # Ns
    my $orient = "AXI";
    if ( abs($dc[0]) >  abs($dc[3]) && abs($dc[0]) > abs($normal_vec[0]) ) {
        unless ( abs($dc[4]) > abs($dc[1]) && abs($dc[4]) > abs($normal_vec[1]) ) {
	    $orient = "COR";
        }  
    } elsif ( abs($dc[3]) > abs($dc[0]) && abs($dc[3]) > abs($normal_vec[0]) ){
        unless(abs($dc[1]) > abs($dc[4]) && abs($dc[1]) > abs($normal_vec[1]) ) {
            $orient = "COR";
        } 
    } else {
        $orient = "SAG";
    }
    return $orient;
}


sub get_image_positions {
    my ($series_path) = @_;
    my @files = <$series_path/*.dcm>;
    my $no_files = scalar @files;
    my @fields = qw(ImagePositionPatient InstanceNumber);
    my @ff = grep(/001\.dcm$/, @files);
    my @lf = grep(/$no_files\.dcm/, @files);
    my %vmap_first = dcmutils::get_dicom_header_values($ff[0], @fields);
    my %vmap_last = dcmutils::get_dicom_header_values($lf[0], @fields);
    if ($vmap_first{InstanceNumber} != 1 || $vmap_last{InstanceNumber} != $no_files) {
      my $first = undef;
      my $last = undef;
      # needs to check every image file to find out the first and the last image
  } else {
     my @first_origin = split(/\\/, $vmap_first{ImagePositionPatient});
     my @last_origin = split(/\\/, $vmap_last{ImagePositionPatient});
     return (\@first_origin, \@last_origin);  
  }
}

sub get_pixel_spacing {
    my ($pixel_spacing) = @_;
    my @sp = split(/\\/,$pixel_spacing,2);
    return @sp;
}

sub get_age {
  my ($age_val) = @_;
  my $age;
  my $qualifier;
  if ( $age_val =~ /(\d+)([D|W|M|Y])/ ) {
     $qualifier = $2;
     $age = $1;
     $age="90+" if ($age >= 90 && $qualifier eq "Y");
  }
  return $age;
}

sub get_age_qualifier {
  my ($age_val) = @_;
  my $qualifier;
  if ( $age_val =~ /([D|W|M|Y])/ ) {
    $qualifier = $1;
  }
  return $qualifier;
}


sub prep_metadata_config_file {
    my($meta_map_ref, $conf_fname, $srb_root) = @_;
    # my %meta_map = uploadutils::load_clinical_metadata($meta_fname);
    my %meta_map = %{$meta_map_ref};
    open(OUT,">$conf_fname") or croak("Cannot create file $conf_fname:$!");
    foreach my $key ( sort keys(%meta_map) ) {
	my $cmeta_ref = $meta_map{$key};
        my $data = add_metadata_for_series($key, $srb_root, %{$cmeta_ref});
        print $data;
        print OUT $data;
    }
    close(OUT);
}



sub add_metadata_for_series {
    my ($series_path, $srb_root, %cmeta_map) = @_;
    my $data = "";
    my @files = <$series_path/*.dcm>;
    croak("No DICOM files in $series_path!") unless (scalar @files > 0);
    
    # 
    my %dcm_map = dcmutils::get_dicom_header_values($files[0], @dicom_keys );
    print("ImagesInAcquisition=$dcm_map{ImagesInAcquisition}\n");
    my $col = $series_path;
    # $col =~ s/^$root//;
    $srb_root =~ s/\/$//; 

    my $visit_col = "Visit_" . uploadutils::padnumbers($cmeta_map{VisitID},3, '0');
    my $study_col = "Study_" .  uploadutils::padnumbers($cmeta_map{StudyID}, 4, '0');
    my $series_col = uploadutils::padnumbers($dcm_map{SeriesNumber},3,'0') . ".ser";    
    my $subject_col =  $cmeta_map{BirnID};
    if ( $subject_col =~ /(\d\d\d\d\d\d\d\d)$/ ) {
        $subject_col = $1;
    }
    $subject_col =  uploadutils::padnumbers($cmeta_map{InstitutionID},4,'0') . $subject_col;

    # for subject

    my $subject = "$srb_root/$subject_col";
    $data .= "collection=$subject\n";
    $data .= "BIRN_ID=$cmeta_map{BirnID}\nGender=$cmeta_map{Gender}\n";

    # for visit

    my $visit = "$srb_root/$subject_col/$visit_col";
    $data .= "collection=$visit\n";
    my $visit_num = uploadutils::padnumbers($cmeta_map{VisitID},3, '0');
    $visit_num =~ s/^Visit\_ //;      
    $data .= "VisitNumber=$visit_num\nAge=";
    my $age =  $cmeta_map{Age};
    if ($age >= 90 &&  $cmeta_map{AgeQualifier} eq "Y")  {
	$data .= "90+\n";
    } else {
	$data .= "$age\n"
        } 
    $data .= "Diagnosis=$cmeta_map{Diagnosis}\nAgeQualifier=$cmeta_map{AgeQualifier}\n";
    $data .= "MMSE=$cmeta_map{MMSE}\n";


    # for studies
    my $study = "$srb_root/$subject_col/$visit_col/$study_col";
    $data .= "collection=$study\n"; 
    $data .= "ScanType=$cmeta_map{ScanType}\n";
    $data .= "StudyID=".  uploadutils::padnumbers($cmeta_map{StudyID},4,'0') . "\n";
    $data .= "Manufacturer=$dcm_map{Manufacturer}\n";
    $data .= "Model=$dcm_map{Model}\n";
    $data .= "StationName=$cmeta_map{StationName}\n";    
    $data .= "InstitutionID=$cmeta_map{InstitutionID}\n";
    

    # for series
    my $series = "$srb_root/$subject_col/$visit_col/$study_col/RawData/$series_col";
    $data .= "collection=$series\n"; 	
    $data .= "SeriesInstanceUID=$dcm_map{SeriesInstanceUID}\n";
    $data .= "SeriesNumber=$dcm_map{SeriesNumber}\nSequence=$dcm_map{ScanningSequence}\n";
    $data .= "SequenceVariant=$dcm_map{SequenceVariant}\n";
    $data .= "SequenceName=$dcm_map{SequenceName}\n";
    $data .= "MRAcquistionType=$dcm_map{MRAcquisitionType}\n";
    $data .= "AcqPlane=" . 
       get_acq_plane($series_path, $dcm_map{ImageOrientationPatient}) . "\n";
    $data .= "Modality=$dcm_map{Modality}\n";
    $data .= "Weighting= $cmeta_map{Weighting}\n";
    $data .= "Matrix_X=$dcm_map{Rows}\nMatrix_Y=$dcm_map{Columns}\n";
    $data .= "Matrix_Z=$dcm_map{ImagesInAcquisition}\n";
    my @sp = get_pixel_spacing( $dcm_map{PixelSpacing} );
    $data .= "PixelSpacing_X=$sp[0]\nPixelSpacing_Y=$sp[1]\n";
    $data .= "SliceThickness=$dcm_map{SliceThickness}\nIntersliceSpace=$dcm_map{SpacingBetweenSlices}\n";
    $data .= "FlipAngle=$dcm_map{FlipAngle}\nTE=$dcm_map{EchoTime}\n";
    $data .= "TR=$dcm_map{RepetitionTime}\n";
    $data .=  "TI=$dcm_map{InversionTime}\n";
    $data .= "FieldStrength=$dcm_map{MagneticFieldStrength}\n";
    $data .= "FacialAnonym=$cmeta_map{FacialAnonym}\n";
    
    return $data;
}


sub rename_dicom_files {
    my (@series_path_list) = @_;
    foreach my $sp (@series_path_list) {
        $sp =~ s/\/$//g; 
	my @files = <$sp/*.dcm>;
        foreach my $file (@files) {
            my %value_map = dcmutils::get_dicom_header_values($file, qw(InstanceNumber) );
            croak("InstanceNumber is not available in DICOM file:$file\n") 
		unless (exists($value_map{InstanceNumber}) );
            my $new_fname = "$sp/" . padnumbers( $value_map{InstanceNumber},3, '0') . ".dcm";
            system("mv $file $new_fname");
	}
    }

}


sub anonymize_headers {
    my ($in_dcm_root, $do_backup) = @_;
    my %anon_map = ();   
    my @lst = dirutils::get_dicom_series($in_dcm_root);
    foreach my $dir (@lst) { 
	my $backup_dir = "${dir}.backup";
	if ( $do_backup == 1 ) {
	    
	    if (-e $backup_dir) {
		warn("Backup dir <$backup_dir> already exists!\nSkipping the anonymization for $dir\n");
		$anon_map{$dir}++;
		next; 
	    } 
	}
	my $outpath = "${dir}.tmp";
        File::Path::mkpath("$outpath") unless (-e $outpath); 
	
	my $rc = system("dcanon -nostrip $dir $outpath");
	if ($rc == 0) {           
            if ($do_backup) {
		system("mv  $dir $backup_dir");
		if (-e $dir) {
		    warn("Cannot rename $dir to $backup_dir! Skipping this series!");
		    next;
		} 
		if (-e $backup_dir) { 
		    system("mv $outpath $dir");
		    unless (-e $dir) {
			warn("Cannot rename $outpath to $dir! Skipping this series!");
			next;
		    }
		    $anon_map{$dir}++;
		} else {
                    warn("No backup dir '$backup_dir'! Skipping this series!");
		}
            } else {
	      File::Path::rmtree($dir) if (-e $dir);
                system("mv $outpath $dir");
		unless (-e $dir) {
		    warn("Cannot rename $outpath to $dir! Skipping this series!");
		    next;
		}
                $anon_map{$dir}++;
            } 	    
	} else {
	    warn("Anonymization failed for $dir! Skipping to the next.");
	}	
    }
    
    return %anon_map;
}



1;

=head1 NAME

uploadutils.pm

=cut

=head1 SYNOPSIS

    read_link_list($fname);
    create_link_list($clin_id_fname, $link_table_fname, $link_list_fname);
    create_dicom_SOPInstanceUID($species, $proj_ID, $birn_ID, $inst_ID, 
                    $visit_ID, $mri_type, $image_data_type, $img_file);
    apply_BIRN_IDs($link_filename, $root_dir);

=cut

=head1 DESCRIPTION

Common routines for data preparation for upload.

=cut

=head1 AUTHOR

Written by I. Burak Ozyurt

=cut

=head1 NOTES

The following external programs/scripts are used by uploadutils.pm

=over 4

=item I<birnid_gen.sh>

for BIRN ID generation and querying

=item I<sgntodc2>

=item I<gentodc>

=back

=cut
