AWS DataSync NFS to S3 data transfer

July 19, 2021

Move data from on-premise NFS to AWS S3 over public internet

Use case:

ESXi 7
Ubuntu 20.04 with NFS v4
AWS DataSync agent

Setup NFS Server
sudo apt update -y
sudo apt upgrade -y
sudo apt install nfs-kernel-server -y
mkdir $HOME/NFS
sudo nano /etc/exports
/home/mateusz/NFS 192.168.88.0/24(rw,sync,no_subtree,_check,crossmnt)
sudo exportfs -ar
sudo exportfs -v
Mount NFS on the client side

Optional for testing

mkdir $HOME/NFS
sudo mount -t nfs -o vers=4 PRIVATE_IP_OF_THE_NFS:/ /home/mateusz/NFS
df -h

Filesystem       Size  Used Avail Use% Mounted on
udev              16G     0   16G   0% /dev
tmpfs            3,2G  2,5M  3,2G   1% /run
/dev/nvme0n1p7    96G   47G   45G  51% /
tmpfs             16G   66M   16G   1% /dev/shm
tmpfs            5,0M  4,0K  5,0M   1% /run/lock
tmpfs             16G     0   16G   0% /sys/fs/cgroup
/dev/loop1       100M  100M     0 100% /snap/core/11187
/dev/loop3       100M  100M     0 100% /snap/core/11316
/dev/loop2       217M  217M     0 100% /snap/code/70
/dev/loop4       219M  219M     0 100% /snap/gnome-3-34-1804/72
/dev/loop0       209M  209M     0 100% /snap/code/69
/dev/loop6        51M   51M     0 100% /snap/snap-store/547
/dev/loop7        65M   65M     0 100% /snap/gtk-common-themes/1514
/dev/loop5        66M   66M     0 100% /snap/gtk-common-themes/1515
/dev/loop8       163M  163M     0 100% /snap/gnome-3-28-1804/145
/dev/loop9       219M  219M     0 100% /snap/gnome-3-34-1804/66
/dev/loop10       56M   56M     0 100% /snap/core18/2074
/dev/loop11      124M  124M     0 100% /snap/slack/42
/dev/loop12       52M   52M     0 100% /snap/snap-store/518
/dev/loop13       33M   33M     0 100% /snap/snapd/12398
/dev/loop14      189M  189M     0 100% /snap/postman/140
/dev/loop15       56M   56M     0 100% /snap/core18/2066
/dev/loop16      129M  129M     0 100% /snap/teams/5
/dev/loop17       33M   33M     0 100% /snap/snapd/12159
/dev/loop18      165M  165M     0 100% /snap/gnome-3-28-1804/161
/dev/nvme0n1p2    95M   33M   63M  35% /boot/efi
tmpfs            3,2G   20K  3,2G   1% /run/user/125
tmpfs            3,2G  4,5M  3,2G   1% /run/user/1000
PRIVATE_IP_OF_THE_NFS:/   63G  8,3G   51G  14% /home/mateusz/NFS
Deploy AWS DataSync agent
  1. Download Agent
wget https://d8vjazrbkazun.cloudfront.net/AWS-DataSync-Agent-VMWare.zip
  1. Deploy a virtual machine from an OVF or OVA file
  2. Get activation code
curl http://IPV4_OF_THE_DATASYNC_AGENT/?gatewayType=SYNC&activationRegion=eu-central-1&no_redirect
  1. Create DataSync agent
aws datasync create-agent \
    --activation-key "ACTIVATION_KEY" \
    --agent-name "DataSync Agent"
{
  "AgentArn": "arn:aws:datasync:eu-central-1:ACCOUNT_NUMBER:agent/agent-0efd30bd15e5a7440"
}
  1. Create NFS source location
aws datasync create-location-nfs \
    --subdirectory "/" \
    --server-hostname "PRIVATE_IP_OF_THE_NFS" \
    --on-prem-config AgentArns=arn:aws:datasync:eu-central-1:ACCOUNT_NUMBER:agent/agent-0efd30bd15e5a7440
{
  "LocationArn": "arn:aws:datasync:eu-central-1:ACCOUNT_NUMBER:location/loc-087c99cc3f4109fd8"
}
  1. Create S3 destination location

    Create S3 bucket

aws s3api create-bucket \
--bucket esxi-on-prem-data-sync \
--create-bucket-configuration LocationConstraint=eu-central-1

Create Role & Policy

trust-policy.json

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "Service": "datasync.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
}

role-policy.json

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Action": [
        "s3:GetBucketLocation",
        "s3:ListBucket",
        "s3:ListBucketMultipartUploads"
      ],
      "Effect": "Allow",
      "Resource": "arn:aws:s3:::esxi-on-prem-data-sync"
    },
    {
      "Action": [
        "s3:AbortMultipartUpload",
        "s3:DeleteObject",
        "s3:GetObject",
        "s3:ListMultipartUploadParts",
        "s3:PutObjectTagging",
        "s3:GetObjectTagging",
        "s3:PutObject"
      ],
      "Effect": "Allow",
      "Resource": "arn:aws:s3:::esxi-on-prem-data-sync/*"
    }
  ]
}

Create role

aws iam create-role \
    --role-name esxi-on-prem-data-sync-s3-destination-role \
    --assume-role-policy-document file://trust-policy.json
{
  "Role": {
    "Path": "/",
    "RoleName": "esxi-on-prem-data-sync-s3-destination-role",
    "RoleId": "AROA2RWLLHCSR5U5NAKHL",
    "Arn": "arn:aws:iam::ACCOUNT_NUMBER:role/esxi-on-prem-data-sync-s3-destination-role",
    "CreateDate": "2021-07-19T11:15:00Z",
    "AssumeRolePolicyDocument": {
      "Version": "2012-10-17",
      "Statement": [
        {
          "Effect": "Allow",
          "Principal": {
            "Service": "datasync.amazonaws.com"
          },
          "Action": "sts:AssumeRole"
        }
      ]
    }
  }
}

Create policy

aws iam create-policy \
    --policy-name esxi-on-prem-data-sync-s3-destination-role-policy \
    --policy-document file://role-policy.json
{
  "Policy": {
    "PolicyName": "esxi-on-prem-data-sync-s3-destination-role-policy",
    "PolicyId": "ANPA2RWLLHCS7XLXKJQQX",
    "Arn": "arn:aws:iam::ACCOUNT_NUMBER:policy/esxi-on-prem-data-sync-s3-destination-role-policy",
    "Path": "/",
    "DefaultVersionId": "v1",
    "AttachmentCount": 0,
    "PermissionsBoundaryUsageCount": 0,
    "IsAttachable": true,
    "CreateDate": "2021-07-19T11:22:33Z",
    "UpdateDate": "2021-07-19T11:22:33Z"
  }
}

Attach policy to role

aws iam attach-role-policy \
    --role-name esxi-on-prem-data-sync-s3-destination-role \
    --policy-arn arn:aws:iam::ACCOUNT_NUMBER:policy/esxi-on-prem-data-sync-s3-destination-role-policy

Create destination location

aws datasync create-location-s3 \
    --s3-bucket-arn arn:aws:s3:::esxi-on-prem-data-sync \
    --s3-config BucketAccessRoleArn=arn:aws:iam::ACCOUNT_NUMBER:role/esxi-on-prem-data-sync-s3-destination-role
{
  "LocationArn": "arn:aws:datasync:eu-central-1:ACCOUNT_NUMBER:location/loc-05c7c0730d30c409d"
}
  1. Create log group
aws logs create-log-group --log-group-name "/aws/datasync/esxi-on-prem-data-sync"
aws logs describe-log-groups --log-group-name-prefix "/aws/datasync/esxi-on-prem-data-sync"
{
  "logGroups": [
    {
      "logGroupName": "/aws/datasync/esxi-on-prem-data-sync",
      "creationTime": 1626694386332,
      "metricFilterCount": 0,
      "arn": "arn:aws:logs:eu-central-1:ACCOUNT_NUMBER:log-group:/aws/datasync/esxi-on-prem-data-sync:*",
      "storedBytes": 0
    }
  ]
}
  1. Create task
aws datasync create-task \
    --source-location-arn arn:aws:datasync:eu-central-1:ACCOUNT_NUMBER:location/loc-087c99cc3f4109fd8 \
    --destination-location-arn arn:aws:datasync:eu-central-1:ACCOUNT_NUMBER:location/loc-05c7c0730d30c409d  \
    --cloud-watch-log-group-arn "arn:aws:logs:eu-central-1:ACCOUNT_NUMBER:log-group:/aws/datasync/esxi-on-prem-data-sync:*" \
    --name "NFS to S3" \
    --options LogLevel=TRANSFER
{
  "TaskArn": "arn:aws:datasync:eu-central-1:ACCOUNT_NUMBER:task/task-063c848071bcb5e12"
}
  1. Execute
aws datasync start-task-execution \
  --task-arn arn:aws:datasync:eu-central-1:ACCOUNT_NUMBER:task/task-063c848071bcb5e12
{
  "TaskExecutionArn": "arn:aws:datasync:eu-central-1:ACCOUNT_NUMBER:task/task-063c848071bcb5e12/execution/exec-08471e0b323eb5d43"
}

Image

Image

Image