## This script is run first. This simulates Monozygotic and Dyzygotic/Sibling quantitative phenotype data, and also runs OpenMX. 

################Libraries required########################
require(OpenMx)#need for running Openmx
require(psych)
require(MASS) #need for multivariate normal function in R (mvrnorm)
require(reshape2)#need for reshaping data between SOLAR and Openmx

####Specify directory and dataset folder and file names#########
##if not running R in same location that SOLAR will be run, need to have R and SOLAR directories. The SOLAR directories are named _linux here##

mainDir <- "/data/Solar_MEK/" 
mainDir_linux <- "/data/Solar_MEK/" #when running on linux change A to /labs/twells and change back when running on comput
subDir_linux<-paste(mainDir_linux,'LiveDemo/',sep="")
subDirmz_linux<-paste(subDir_linux,'twins4/',sep="")
subDir<-paste(mainDir,'LiveDemo/',sep="")
subDirmz<-paste(subDir,'twins4/',sep="")


## Make directories that were specified above if they don't exist already ##

if (!file.exists(subDir)){
  dir.create(file.path(subDir))
}
if (!file.exists(subDirmz)){
  dir.create(file.path(subDirmz))
}

#############Set iterations, number of pairs, A and C###########
max=60 #max number of pairs
number=c(20,max) #number of pairs
set.seed=number #this sets the seed for the mvnorm functions
iterations=10 #number of phenotype files created for each simulation
a2_percent=c(10,50) #this is standardized standard deviation attributable to additive genetics (A/(A+C+E))
c2_percent=0 #this is standardized standard deviation attributable to common environment (set to 0 here)(A/(A+C+E)

#######################Loop simulate twin pedigrees, phenotypes, run OpenMX, make SOLAR commands#############################

for (num in number){
  #looping through pair numbers 
  
  ##########cat commands are making SOLAR command file at mztwin_solar.txt. This file will have to be run to run SOLAR using the command solar> mztwin_solar.txt###########
  cat(paste('load pedigree ',subDirmz_linux[1],'pair',num,"/ped",num,".ped",sep=""), file=paste(subDirmz[1],'mztwin_solar.txt',sep=""),sep="\n",append=TRUE)
  subDirmzpair=paste(subDirmz,'pair',num,sep="")
  subDirmzpair_linux=paste(subDirmz_linux,'pair',num,sep="")
  if (!file.exists(subDirmzpair)){
    dir.create(file.path(subDirmzpair))
  }
  for (A2 in 1:length(a2_percent)){
    #looping through simulated additive genetics standardized variances
    A<-a2_percent[A2]
    subDirmzA<-paste(subDirmzpair,'/A_',A,sep="")
    subDirmzA_linux<-paste(subDirmzpair_linux,'/A_',A,sep="")
    
    if (!file.exists(subDirmzA)){
      dir.create(file.path(subDirmzA))
    }
    
    for (C2 in 1:length(c2_percent)){
      #looping through simulated common environment standardized variances
      C<-c2_percent[C2]
      subDirmzAC<-paste(subDirmzA,'/A_',A,'_C_',C,sep="")
      subDirmzAC_linux<-paste(subDirmzA_linux,'/A_',A,'_C_',C,sep="")
      
      if (!file.exists(subDirmzAC)){
        dir.create(file.path(subDirmzAC))
      }
      
      #creating a file to output the openmx heritability estimates
      H2r_open<-paste(subDirmzAC,'/h2r_twins_openmx.csv',sep="")
      
      if (!file.exists(H2r_open)){
        file.create(file.path(H2r_open))
      }			
      #creating a file to output the openmx beta estimates
      Bage_open<-paste(subDirmzAC,'/bage_twins_openmx.csv',sep="")
      
      if (!file.exists(Bage_open)){
        file.create(file.path(Bage_open))
      }			
      
      #creating a file to output the solar heritability estimates
      H2r_solar<-paste(subDirmzAC,'/h2r_twins_solar.csv',sep="")
      
      if (!file.exists(H2r_solar)){
        file.create(file.path(H2r_solar))
      }			
      #creating a file to output the solar beta estimates
      Bage_solar<-paste(subDirmzAC,'/bage_twins_solar.csv',sep="")
      
      if (!file.exists(Bage_solar)){
        file.create(file.path(Bage_solar))
      }			
      
      h2r_open=c()
      bage_open=c()
      
      for (iter in 1:iterations){
        outdirMZ<-paste(subDirmzAC,'/gen',iter,"out/",sep="")
        outdirMZ_linux<-paste(subDirmzAC_linux,'/gen',iter,"out/",sep="")
        
        if (!file.exists(outdirMZ)){
          dir.create(file.path(outdirMZ))
        }
        
        #adding to SOLAR commands
        cat(paste("load phenotypes ",subDirmzAC_linux,'/gen',iter,'.csv',sep=""), file=paste(subDirmz[1],'mztwin_solar.txt',sep=""),sep="\n",append=TRUE)
        cat("model new", file=paste(subDirmz[1],'mztwin_solar.txt',sep=""),sep="\n",append=TRUE)
        cat(paste("trait ",'kindgen',iter,sep=""), file=paste(subDirmz[1],'mztwin_solar.txt',sep=""),sep="\n",append=TRUE)
        cat("covar age", file=paste(subDirmz[1],'mztwin_solar.txt',sep=""),sep="\n",append=TRUE)
        cat(paste("outdir ",outdirMZ_linux,sep=""), file=paste(subDirmz[1],'mztwin_solar.txt',sep=""),sep="\n",append=TRUE)
        cat("polygenic", file=paste(subDirmz[1],'mztwin_solar.txt',sep=""),sep="\n",append=TRUE)
        
        
        #Simulated phenotype data with certain A,C, and E characteristics
        a2<-A/100     #Additive genetic variance component (a squared) in decimal format 
        c2<-C/100     #Common environment variance component (c squared) in decimal format 
        e2<-1-(a2+c2)     #Specific environment variance component (e squared)in decimal format 
        #setting up relatedness matrix for MZ and DZ twins			
        relatednessMZ=a2*matrix(c(1,1,1,1),2,2) 
        relatednessDZ=a2*matrix(c(1,1/2,1/2,1),2,2)
        diagon=diag(2) 
        diagon[1,2]=1
        diagon[2,1]=1
        household=c2*diagon 
        error=e2*diag(2)
        # using mvrnorm to create simulated phenotypes
        DataMZa <- mvrnorm (num, c(0,0), relatednessMZ) 
        DataMZc <- mvrnorm (num, c(0,0), household) 
	DataMZe <- mvrnorm (num, c(0,0), error) 
	DataDZa <- mvrnorm (num, c(0,0), relatednessDZ)
	DataDZc <- mvrnorm (num, c(0,0), household)
	DataDZe <- mvrnorm (num, c(0,0), error)
	DataMZ1 <- DataMZa + DataMZc + DataMZe
        DataDZ1 <- DataDZa + DataDZc + DataDZe
        
        # variable names (t1 and t2) corresponding to twin 1 and twin2
        selVars <- c('t1','t2')
        colnames(DataMZ1) <- selVars
        colnames(DataDZ1) <- selVars
        
        #age data simulated
        ageMZ<-as.matrix(round(100*runif(n=nrow(DataMZ1),0,1)),nrow(DataMZ1),1)
        ageDZ<-as.matrix(round(100*runif(n=nrow(DataDZ1),0,1)),nrow(DataDZ1),1)
        DataMZ2<-cbind((DataMZ1 +.005*cbind(ageMZ,ageMZ)),ageMZ,ageMZ)
        DataDZ2 <-cbind((DataDZ1 +.005*cbind(ageDZ,ageDZ)),ageDZ,ageDZ)
        colnames(DataMZ2) <- c('t1','t2','age1','age2')
        colnames(DataDZ2) <- c('t1','t2','age1','age2')
        selVars <- c('t1','t2')
        ################################################################
        
        #phenotypes and covariates simulated above#
        
        ####################OpenMX Script ACE model#####################
        twinACEModel <- mxModel("twinACE",
                                # Matrices X, Y, and Z to store a, c, and e path coefficients
                                mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=.2, label="a", name="X" ),
                                mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=.2, label="c", name="Y" ),
                                mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=.2, label="e", name="Z" ),
                                # Matrices A, C, and E compute variance components
                                mxAlgebra( expression=X %*% t(X), name="A" ),
                                mxAlgebra( expression=Y %*% t(Y), name="C" ),
                                mxAlgebra( expression=Z %*% t(Z), name="E" ),
                                mxMatrix( type="Full", nrow=1, ncol=2, free=TRUE, values= 20, label="mean", name="expMean" ),
                                # Declare a matrix for the definition variable regression parameters, called beta
                                mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values= 0, label=c("betaAge"), name="beta"),
                                # Algebra for expected variance/covariance matrix in MZ
                                mxAlgebra(
                                  expression= rbind  (cbind(A+C+E , A+C),
                                                      cbind(A+C   , A+C+E) ), 
                                  name="expCovMZ"),
                                # Algebra for expected variance/covariance matrix in DZ
                                # note use of 0.5, converted to 1*1 matrix
                                mxAlgebra(
                                  expression= rbind  (cbind(A+C+E     , 0.5%x%A+C),
                                                      cbind(0.5%x%A+C , A+C+E) ), 
                                  name="expCovDZ"),
                                
                                mxModel("MZ", mxData( observed=DataMZ2, type="raw" ),
                                        # Algebra for making the means a function of the definition variables age and sex
                                        mxMatrix( type="Full", nrow=1, ncol=2, free=F, label=c("data.age1","data.age2"), name="MZDefVars"),
                                        mxAlgebra( expression=twinACE.expMean + twinACE.beta %*% MZDefVars, name="expMeanMZ"),
                                        mxFIMLObjective( covariance="twinACE.expCovMZ", means="expMeanMZ", dimnames=selVars )	),
                                mxModel("DZ", mxData( observed=DataDZ2, type="raw" ),
                                        mxMatrix( type="Full", nrow=1, ncol=2, free=F, label=c("data.age1","data.age2"), name="DZDefVars"),
                                        mxAlgebra( expression=twinACE.expMean + twinACE.beta %*% DZDefVars, name="expMeanDZ"),
                                        mxFIMLObjective( covariance="twinACE.expCovDZ", means="expMeanDZ", dimnames=selVars )	),
                                mxAlgebra( expression=MZ.objective + DZ.objective, name="twin" ),
                                mxAlgebraObjective("twin")
        )
        #############################################################
        #ACE model set in OpenMX above
        #############################################################
        #Run ACE model and pull summary variables 
        #############################################################
        twinACEFit <- mxRun(twinACEModel)
        summary(twinACEFit)
        b_mx<-mxEval(betaAge,twinACEFit) #pull beta coefficients
        A_mx<-mxEval(a*a,twinACEFit) #pull heritability estimates
        #############################################################
        ##Pull all OpenMX results for every run and place in results file
        h2r_open=as.matrix(c(h2r_open,A_mx))
        bage_open=as.matrix(c(bage_open,b_mx))
        
        ##############################################################
        ##Make MZ and DZ Phenotype files for SOLAR (Have to have sex data for SOLAR) 		
        # simulate binary 0/1 sex variable for MZ 
        sexMZ<-cut(as.vector(runif(n=nrow(DataMZ2),0,1)), c(-Inf,0.5,Inf), labels=F)
        DataMZ3 <- cbind("mztwin"=1:nrow(DataMZ2),DataMZ2[,1:3],sexMZ)
        DataDZ3 <- cbind("dztwin"=1:nrow(DataDZ2),DataDZ2[,1:3])
        colnames(DataMZ3) <- c('mztwin','t1','t2','age','sex')
        colnames(DataDZ3) <- c('dztwin','t1','t2','age')
        DataMZ<- transform(DataMZ3)
        DataDZ<- transform(DataDZ3)
        
        #this data is in wide format which OpenMX uses (with removal or ignoring of the dz/mztwin column), we need to use reshape2 to make it into long format so that each twin has one row, as SOLAR uses
        
        DataMZ_frame <- data.frame(DataMZ3)
        phenoMZ1<-melt(DataMZ_frame,id=c("mztwin","age","sex"))
        DataDZ_frame <- data.frame(DataDZ3)
        phenoDZ1<-melt(DataDZ_frame,id=c("dztwin","age"))
        
        #sort by mztwin id which is basically famid
        attach(phenoMZ1)
        phenoMZ<-phenoMZ1[order(mztwin),]
        detach(phenoMZ1)
        attach(phenoDZ1)
        phenoDZ<-phenoDZ1[order(dztwin),]
        detach(phenoDZ1)
        
        #add an index for twin 1, twin2 (ie add in their IDs for SOLAR, they can be 1/2 because their famid and mzID will be different)
        
        phenoMZ$index<-1:2
        solarphenoMZ<-cbind(phenoMZ$index, phenoMZ$age, phenoMZ$value, phenoMZ$mztwin)
        colnames(solarphenoMZ) <- c('ID','age',paste('kindgen',iter,sep=""),'FAMID')
        phenoDZ$index<-1:2
        solarphenoDZ<-cbind(phenoDZ$index, phenoDZ$age, phenoDZ$value, max+phenoDZ$dztwin)
        colnames(solarphenoDZ) <- c('ID','age',paste('kindgen',iter,sep=""),'FAMID')
        solarphenotwins=rbind(solarphenoMZ,solarphenoDZ)
        write.table(solarphenotwins, paste(subDirmzAC,'/gen',iter,'.csv',sep=""), col.names=TRUE, row.names=FALSE, quote=FALSE, sep=",")
        
        ##############################################################
        ##############Make Pedigree file for MZ and DZ twins together###
        
        # Add simulated binary 0/1 sex variable for DZ (don't have to be same for twins, i.e. can have a male/female DZ twin set, which is why this is done later than it was done for MZ twins)
        
        sexDZ<-cut(as.vector(runif(n=nrow(phenoDZ),0,1)), c(-Inf,0.5,Inf), labels=F)
        phenoDZ2<- cbind(phenoDZ,sexDZ)
        
        # simulate DZ pedigree with parents included (neccessary for SOLAR)
        solarpedDZ<-cbind(phenoDZ2$index, as.matrix(rep(3,nrow(phenoDZ2))), as.matrix(rep(4,nrow(phenoDZ2))), phenoDZ2$sexDZ, max+phenoDZ2$dztwin,as.matrix(rep("",nrow(phenoDZ2))),phenoDZ2$dztwin+max)
        colnames(solarpedDZ) <- c('ID','fa','mo','sex','famid','mztwin','hhid')
        dzparents<-matrix(0,nrow(solarpedDZ),ncol(solarpedDZ))
        dzparents[,1]<-3:4
        dzparents[,4]<-1:2
        dzparents[,5]<-max+rep(1:nrow(solarpedDZ),each=2,length=nrow(solarpedDZ))
        dzparents[,6]<-""
        dzparents[,7]<-""
        solarpedDZ2<-rbind(solarpedDZ,dzparents)
        
        
        # simulate MZ pedigree with parents included (neccessary for SOLAR)
        solarpedMZ<-cbind(phenoMZ$index, as.matrix(rep(3,nrow(phenoMZ))), as.matrix(rep(4,nrow(phenoMZ))), phenoMZ$sex, phenoMZ$mztwin, phenoMZ$mztwin,phenoMZ$mztwin)
        colnames(solarpedMZ) <- c('ID','fa','mo','sex','famid','mztwin','hhid')
        mzparents<-matrix(0,nrow(solarpedMZ),ncol(solarpedMZ))
        mzparents[,1]<-3:4
        mzparents[,4]<-1:2
        mzparents[,5]<-rep(1:nrow(solarpedMZ),each=2,length=nrow(solarpedMZ))
        mzparents[,6]<-""
        mzparents[,7]<-""
        solarpedMZ2<-rbind(solarpedMZ,mzparents)
        solarpedtwins=rbind(solarpedMZ2,solarpedDZ2)
        write.table(solarpedtwins, paste(subDirmz,'pair',num,"/ped",num,".ped",sep=""), col.names=TRUE, row.names=FALSE, quote=FALSE, sep=",")
      }
      
      ############Write table of heritability and beta coefficient results for OpenMX##########################################
      write.table(h2r_open, H2r_open, col.names=FALSE, row.names=FALSE, quote=FALSE, sep=",")
      write.table(bage_open, Bage_open, col.names=FALSE, row.names=FALSE, quote=FALSE, sep=",")
    }
  }
}





