# NITRC-CE AWS environment manager v1.2
#
# This Vagrantfile can be used to run processing pipelines on AWS EC2 instances.
#
# Quick Start
# -----------
# 1) Download Vagrant and install it in a directory in your env PATH (e.g. /usr/bin):
#      https://www.vagrantup.com/downloads.html
# 2) Install the vagrant AWS plugin: vagrant plugin install vagrant-aws
# 3) Edit the configuration parameters below specific to your AWS subscription
# 4) Start your EC2 instance: vagrant up
#
# Useful vagrant commands
# -----------------------
# To SSH into your EC2 instance: vagrant ssh
# To get the status of your EC2 instance: vagrant status
# To stop and delete your EC2 instance: vagrant destroy
# To discover the hostname of you EC2 instance: vagrant ssh-config
#
# Running a pipeline with the option to automatically shutdown the EC2 instance
# -----------------------------------------------------------------------------
# It is possible to submit a pipeline shell script to read input, process, and
# output the results to a directory on the EC2 instance or in an AWS S3 bucket.
# When setting up your pipeline shell script, use the following directories
# for your input and output directories:
#
# Your shell script will be uploaded to the $HOME directory on the EC2 instance
# $HOME/input - Directory to upload data files
# $HOME/s3input - Directory to mount an S3 bucket with input data
# $HOME/output - Directory to output results data (It can be configured to
#   save to an output S3 bucket when the shell script finishes.)
#
# Your pipeline running progress can be monitored through your browser by
# logging into your EC2 instance NITRC-CE dashboard. The dashboard is located
# at the URL of the instance IP address: https://your-ec2-instance-ip-address
#
###############################################################################

# Configuration Settings

# Available AMI IDs: https://www.nitrc.org/plugins/mwiki/index.php/nitrc:NITRC-CE_AMI_List
# Automatically running a PIPELINE_SCRIPT requires NITRC-CE v0.48.2 or greater
# See: https://www.nitrc.org/plugins/mwiki/index.php/nitrc:NITRC-CE_AMI_List
@AWS_NITRC_CE_AMI = 'NITRC_CE_AMI_ID_HERE'

@AWS_ACCESS_KEY_ID = 'YOUR_AWS_ACCESS_KEY_ID_HERE'
@AWS_SECRET_ACCESS_KEY = 'YOUR_AWS_SECRET_ACCESS_KEY_HERE'
@AWS_REGION = 'us-east-1'
@AWS_INSTANCE_TYPE = 't2.medium'
@AWS_SECURITY_GROUP = 'YOUR_SECURITY_GROUP_ID_HERE'
@AWS_SUBNET_ID = 'YOUR_VPC_SUBNET_ID_HERE'
@AWS_KEYPAIR_NAME = 'YOUR_AWS_KEYPAIR_NAME_HERE'
@AWS_PRIVATE_KEY_PATH = '~/.ssh/id_rsa'
@AWS_INSTANCE_TAG = 'YOUR_AWS_INSTANCE_TAG_HERE' # Appears as display name in EC2 dashboard
@AWS_DISK_SIZE = 100 # Disk size in GB. Must be >= 100GB

# Filename or URL of a shell script to run the processing.
# The script will be loaded into the $HOME directory of the EC2 instance.
# It will have access to the following preset directories:
#   $HOME/input (input data)
#   $HOME/s3input (input data located in an S3 bucket)
#   $HOME/output (directory where pipeline output should be saved)
@PIPELINE_SCRIPT = ''

# Local directory to copy up to the $HOME/input directory on NITRC-CE.
@UPLOAD_INPUT_DIRECTORY = ''

# S3 bucket to mount in the $HOME/s3input directory (OPTIONAL)
# Should be the full bucket path including the ":". Root buckets are designated
# with a "/".
# For example, to mount the publicly available OpenFMRI dataset the
# S3 endpoint to enter below is "openfmri:/"
@INPUT_S3_BUCKET = ''

# AWS access and secret keys for input S3 bucket (if not public S3 buckets)
@INPUT_S3_AWS_ACCESS_KEY_ID = ''
@INPUT_S3_AWS_SECRET_ACCESS_KEY = ''

# S3 bucket to automatically copy the contents of $HOME/output to after the pipeline
# completes successfully. Needed if you automatically terminate the
# EC2 instance when the pipeline completes.
# Should be the full bucket path including the ":". Root buckets are designated
# with a "/". (e.g. "mybucket:/")
@OUTPUT_S3_BUCKET = ''

# AWS access and secret keys for output S3 bucket (if not public S3 buckets)
@OUTPUT_S3_AWS_ACCESS_KEY_ID = ''
@OUTPUT_S3_AWS_SECRET_ACCESS_KEY = ''

# Terminate the EC2 instance on pipeline exit
# Be sure to designate an @OUTPUT_S3_BUCKET above which will automatically have all
# the contents of the $HOME/output directory copied into it when the EC2
# instance terminates, otherwise your output files will be lost. (valid values: true or false)
@TERMINATE_EC2_INSTANCE_ON_EXIT = false


# End of Configuration (do not edit below this line) ##########################

Vagrant.configure("2") do |config|

  # Disable default synced folder
  config.vm.synced_folder ".", "/vagrant", disabled: true

  require 'vagrant-aws'
  config.vm.box = "NITRC/aws"
  config.vm.box_version = "1.0.0"

  config.vm.provider "aws" do |aws, override|
    aws.access_key_id = @AWS_ACCESS_KEY_ID
    aws.secret_access_key = @AWS_SECRET_ACCESS_KEY
    aws.keypair_name = @AWS_KEYPAIR_NAME
    aws.instance_type = @AWS_INSTANCE_TYPE
    aws.region = @AWS_REGION
    aws.ami = @AWS_NITRC_CE_AMI
    aws.security_groups = [@AWS_SECURITY_GROUP]
    if !@AWS_SUBNET_ID.empty?
      aws.subnet_id =  @AWS_SUBNET_ID
    end
    override.ssh.username = 'ubuntu'
    override.ssh.private_key_path = @AWS_PRIVATE_KEY_PATH
    aws.tags = {
	    'Name' => @AWS_INSTANCE_TAG
    }
    aws.block_device_mapping = [
      {
        'DeviceName' => '/dev/sda1',
        'Ebs.VolumeSize' => @AWS_DISK_SIZE
      }
    ]
  end

  if !@UPLOAD_INPUT_DIRECTORY.empty?
    config.vm.provision "shell",
      name: 'Create $HOME/input directory',
      privileged: false,
      inline: <<-SHELL
        mkdir -p $HOME/input
      SHELL
    config.vm.provision "file",
      source: @UPLOAD_INPUT_DIRECTORY,
      destination: "$HOME/input"
  end

  if !@INPUT_S3_BUCKET.empty?
    config.vm.provision "shell",
      name: 'Mount input S3 bucket at $HOME/s3input',
      privileged: false,
      inline: <<-SHELL
        source $HOME/.profile
        export AWS_ACCESS_KEY_ID=#{@INPUT_S3_AWS_ACCESS_KEY_ID}
        export AWS_SECRET_ACCESS_KEY=#{@INPUT_S3_AWS_SECRET_ACCESS_KEY}
        mkdir -p $HOME/s3/input
        nmonitor mount --bucket #{@INPUT_S3_BUCKET} --mountpoint input \
          --access_method ro
        ln -s $HOME/s3/input $HOME/s3input
      SHELL
  end

  if !@OUTPUT_S3_BUCKET.empty?
    config.vm.provision "shell",
      name: 'Mount output S3 bucket at $HOME/s3/output',
      privileged: false,
      inline: <<-SHELL
        echo "#!/bin/bash
          cp -r $HOME/output/* $HOME/s3/output" > $HOME/bin/pipeline_copy_out.sh
        chmod 755 $HOME/bin/pipeline_copy_out.sh
        source $HOME/.profile
        export AWS_ACCESS_KEY_ID=#{@OUTPUT_S3_AWS_ACCESS_KEY_ID}
        export AWS_SECRET_ACCESS_KEY=#{@OUTPUT_S3_AWS_SECRET_ACCESS_KEY}
        mkdir -p $HOME/s3/output
        nmonitor mount --bucket #{@OUTPUT_S3_BUCKET} --mountpoint output \
          --access_method rw
      SHELL
  end

  if @TERMINATE_EC2_INSTANCE_ON_EXIT
    config.vm.provision "shell",
      name: 'Start pipeline script',
      privileged: false,
      inline: <<-SHELL
        echo "#!/bin/bash
          export AWS_ACCESS_KEY_ID=#{@AWS_ACCESS_KEY_ID}
          export AWS_SECRET_ACCESS_KEY=#{@AWS_SECRET_ACCESS_KEY}
          export AWS_DEFAULT_REGION=#{@AWS_REGION}
          /usr/local/bin/aws ec2 terminate-instances --instance-ids \
            $(curl -s http://169.254.169.254/latest/meta-data/instance-id)
        "  > $HOME/bin/pipeline_terminate.sh
        chmod 700 $HOME/bin/pipeline_terminate.sh
      SHELL
  end

  if !@PIPELINE_SCRIPT.empty?
    config.vm.provision "file",
      source: @PIPELINE_SCRIPT,
      destination: "$HOME/pipeline.sh"
    config.vm.provision "shell",
      name: 'Start pipeline script',
      privileged: false,
      inline: <<-SHELL
        mkdir -p $HOME/output
        echo "#!/bin/bash
          /usr/local/bin/supervisorctl tail pipeline stderr > $HOME/output/pipeline_tail_error.log
          /usr/local/bin/supervisorctl tail -1600 pipeline > $HOME/output/pipeline_tail_stdout.log
          cp $HOME/.nitrcce/api/pipeline_listener.log $HOME/output/pipeline_listener.log
        "  > $HOME/bin/pipeline_logs.sh
        chmod 755 $HOME/bin/pipeline_logs.sh
        supervisord_ready=true
        loop_count=0
        while [[ $(/usr/local/bin/supervisorctl status) == *"refused connection"* ]]
        do
          echo "Waiting for supervisord process to start"
          sleep 15
          loop_count=$((loop_count+1))
          max_count=40
          if [ "$loop_count" -eq "$max_count" ]; then
              echo "Supervisord process taking too long to start."
              echo "Exiting without running pipeline."
              supervisord_ready=false
              break
          fi
        done
        if [ "$supervisord_ready" = true ]; then
          echo "Supervisord process has started"
          echo "Starting pipeline"
          chmod 755 $HOME/pipeline.sh
          nmonitor create --name pipeline --command $HOME/pipeline.sh > /dev/null 2>&1
          nmonitor start --name pipeline
          echo "----------------------------------------------------------------------"
          echo "The pipeline script has been started."
          if [ -e $HOME/bin/pipeline_terminate.sh ]; then
            echo "Your machine will terminate automatically once the pipeline has completed."
            echo "Until it terminates, you can log into the EC2 instance"
          else
            echo "To monitor long running pipelines, you can log into the EC2 instance"
          fi
          echo "by running 'vagrant ssh' at the command line. Once logged in,"
          echo "type 'nmonitor list' at the command line to check the pipeline status."
          echo "You can also check on the status of your pipeline script by"
          echo "logging into the web dashboard running on your EC2 instance."
          echo "The web dashboard is located at http://$(curl -s http://169.254.169.254/latest/meta-data/public-hostname)"
        fi
      SHELL
  end

end