blob: 6e528b97d40d567cec2f444e9c848d9def71a8c8 [file] [log] [blame]
package gce
import (
"fmt"
"io/ioutil"
"os"
"os/user"
"path"
"path/filepath"
"runtime"
"strings"
"time"
"google.golang.org/api/compute/v0.alpha"
"go.skia.org/infra/go/auth"
"go.skia.org/infra/go/exec"
"go.skia.org/infra/go/metadata"
"go.skia.org/infra/go/sklog"
"go.skia.org/infra/go/util"
)
const (
ACCELERATOR_TYPE_NVIDIA_TESLA_K80 = "projects/google.com:skia-buildbots/zones/us-east1-d/acceleratorTypes/nvidia-tesla-k80"
CPU_PLATFORM_SKYLAKE = "Intel Skylake"
// Labels can only contain lowercase letters, numbers, underscores, and dashes.
DATE_FORMAT = "2006-01-02"
DATETIME_FORMAT = "2006-01-02_15-04-05"
DISK_SNAPSHOT_SYSTEMD_PUSHABLE_BASE = "skia-systemd-pushable-base"
DISK_TYPE_LOCAL_SSD = "local-ssd"
DISK_TYPE_PERSISTENT_STANDARD = "pd-standard"
DISK_TYPE_PERSISTENT_SSD = "pd-ssd"
IMAGE_STATUS_READY = "READY"
MACHINE_TYPE_HIGHMEM_2 = "n1-highmem-2"
MACHINE_TYPE_HIGHMEM_16 = "n1-highmem-16"
MACHINE_TYPE_HIGHMEM_32 = "n1-highmem-32"
MACHINE_TYPE_STANDARD_1 = "n1-standard-1"
MACHINE_TYPE_STANDARD_2 = "n1-standard-2"
MACHINE_TYPE_STANDARD_4 = "n1-standard-4"
MACHINE_TYPE_STANDARD_8 = "n1-standard-8"
MACHINE_TYPE_STANDARD_16 = "n1-standard-16"
MACHINE_TYPE_STANDARD_32 = "n1-standard-32"
MAINTENANCE_POLICY_MIGRATE = "MIGRATE"
MAINTENANCE_POLICY_TERMINATE = "TERMINATE"
NETWORK_DEFAULT = "global/networks/default"
OS_LINUX = "Linux"
OS_WINDOWS = "Windows"
PROJECT_ID = "google.com:skia-buildbots"
SERVICE_ACCOUNT_DEFAULT = "31977622648@project.gserviceaccount.com"
SERVICE_ACCOUNT_CHROME_SWARMING = "chrome-swarming-bots@skia-buildbots.google.com.iam.gserviceaccount.com"
SERVICE_ACCOUNT_CHROMIUM_SWARM = "chromium-swarm-bots@skia-buildbots.google.com.iam.gserviceaccount.com"
SETUP_SCRIPT_KEY_LINUX = "setup-script"
SETUP_SCRIPT_KEY_WIN = "sysprep-oobe-script-ps1"
SETUP_SCRIPT_PATH_LINUX = "/tmp/setup-script.sh"
USER_DEFAULT = "default"
ZONE_CENTRAL1_B = "us-central1-b"
ZONE_CENTRAL1_C = "us-central1-c"
ZONE_EAST1_D = "us-east1-d"
ZONE_CT = ZONE_CENTRAL1_B
ZONE_DEFAULT = ZONE_CENTRAL1_C
ZONE_GPU = ZONE_EAST1_D
ZONE_SKYLAKE = ZONE_CENTRAL1_B
diskStatusError = "ERROR"
diskStatusReady = "READY"
instanceStatusError = "ERROR"
instanceStatusRunning = "RUNNING"
instanceStatusStopped = "TERMINATED"
errNotFound = "\\\"reason\\\": \\\"notFound\\\""
errAlreadyExists = "\\\"reason\\\": \\\"alreadyExists\\\""
maxWaitTime = 10 * time.Minute
winSetupFinishedText = "Instance setup finished."
winStartupFinishedText = "Finished running startup scripts."
)
var (
VALID_OS = []string{OS_LINUX, OS_WINDOWS}
)
// GCloud is a struct used for creating disks and instances in GCE.
type GCloud struct {
project string
s *compute.Service
workdir string
zone string
}
// NewGCloud returns a GCloud instance.
func NewGCloud(zone, workdir string) (*GCloud, error) {
oauthCacheFile := path.Join(workdir, "gcloud_token.data")
httpClient, err := auth.NewClient(true, oauthCacheFile, compute.CloudPlatformScope, compute.ComputeScope, compute.DevstorageFullControlScope)
if err != nil {
return nil, err
}
s, err := compute.New(httpClient)
if err != nil {
return nil, err
}
// Verify that we're set up for SSH.
if _, err := sshArgs(); err != nil {
return nil, err
}
return &GCloud{
project: PROJECT_ID,
s: s,
workdir: workdir,
zone: zone,
}, nil
}
// Disk is a struct describing a disk resource in GCE.
type Disk struct {
// The name of the disk.
Name string
// Size of the disk, in gigabytes.
SizeGb int64
// Optional, image to flash to the disk. Use only one of SourceImage
// and SourceSnapshot.
SourceImage string
// Optional, snapshot to flash to the disk. Use only one of SourceImage
// and SourceSnapshot.
SourceSnapshot string
// Type of disk, eg. "pd-standard" or "pd-ssd".
Type string
}
// CreateDisk creates the given disk.
func (g *GCloud) CreateDisk(disk *Disk, ignoreExists bool) error {
sklog.Infof("Creating disk %q", disk.Name)
d := &compute.Disk{
Name: disk.Name,
SizeGb: disk.SizeGb,
Type: fmt.Sprintf("zones/%s/diskTypes/%s", g.zone, disk.Type),
}
if disk.SourceImage != "" && disk.SourceSnapshot != "" {
return fmt.Errorf("Only one of SourceImage and SourceSnapshot may be used.")
}
if disk.SourceImage != "" {
if len(strings.Split(disk.SourceImage, "/")) == 5 {
d.SourceImage = disk.SourceImage
} else {
d.SourceImage = fmt.Sprintf("projects/%s/global/images/%s", g.project, disk.SourceImage)
}
} else if disk.SourceSnapshot != "" {
if len(strings.Split(disk.SourceSnapshot, "/")) == 5 {
d.SourceSnapshot = disk.SourceSnapshot
} else {
d.SourceSnapshot = fmt.Sprintf("projects/%s/global/snapshots/%s", g.project, disk.SourceSnapshot)
}
}
op, err := g.s.Disks.Insert(g.project, g.zone, d).Do()
if err != nil {
if strings.Contains(err.Error(), errAlreadyExists) {
if ignoreExists {
sklog.Infof("Disk %q already exists; ignoring.", disk.Name)
} else {
return fmt.Errorf("Disk %q already exists.", disk.Name)
}
} else {
return err
}
} else if op.Error != nil {
return fmt.Errorf("Failed to insert disk: %v", op.Error)
} else {
if err := g.waitForDisk(disk.Name, diskStatusReady, maxWaitTime); err != nil {
return err
}
sklog.Infof("Successfully created disk %s", disk.Name)
}
return nil
}
// DeleteDisk deletes the given disk.
func (g *GCloud) DeleteDisk(name string, ignoreNotExists bool) error {
sklog.Infof("Deleting disk %q", name)
op, err := g.s.Disks.Delete(g.project, g.zone, name).Do()
if err != nil {
if strings.Contains(err.Error(), errNotFound) {
if ignoreNotExists {
sklog.Infof("Disk %q does not exist; ignoring.", name)
} else {
return fmt.Errorf("Disk %q already exists.", name)
}
} else {
return fmt.Errorf("Failed to delete disk %q: %s", name, err)
}
} else if op.Error != nil {
return fmt.Errorf("Failed to delete disk: %v", op.Error)
} else {
if err := g.waitForDisk(name, diskStatusError, maxWaitTime); err != nil {
return err
}
sklog.Infof("Successfully deleted disk %s", name)
}
return nil
}
// getDiskStatus returns the current status of the disk.
func (g *GCloud) getDiskStatus(name string) string {
d, err := g.s.Disks.Get(g.project, g.zone, name).Do()
if err != nil {
return diskStatusError
}
return d.Status
}
// waitForDisk waits until the disk has the given status.
func (g *GCloud) waitForDisk(name, status string, timeout time.Duration) error {
start := time.Now()
for st := g.getDiskStatus(name); st != status; st = g.getDiskStatus(name) {
if time.Now().Sub(start) > timeout {
return fmt.Errorf("Exceeded timeout of %s", timeout)
}
sklog.Infof("Waiting for disk %q (status %s)", name, st)
time.Sleep(5 * time.Second)
}
return nil
}
// Instance is a struct representing a GCE VM instance.
type Instance struct {
// Information about the boot disk. Required.
BootDisk *Disk
// Information about an extra data disk. Optional.
DataDisk *Disk
// External IP address for the instance. Required.
ExternalIpAddress string
// Whether or not to include an NVIDIA Tesla k80 GPU on the instance.
Gpu bool
// Files to download from Google Storage. Map keys are destination paths
// on the GCE instance and and values are the source URLs. Paths may be
// absolute or relative (to the default user's home dir, eg.
// /home/default).
GSDownloads map[string]string
// GCE machine type specification, eg. "n1-standard-16".
MachineType string
// Maintenance policy. Default is MAINTENANCE_POLICY_MIGRATE, which is
// not supported for preemtible instances.
MaintenancePolicy string
// Instance-level metadata keys and values.
Metadata map[string]string
// Files to create based on metadata. Map keys are destination paths on
// the GCE instance and values are the source URLs (see
// metadata.METADATA_URL). Paths May be absolute or relative (to the
// default user's home dir, eg. /home/default).
MetadataDownloads map[string]string
// Minimum CPU platform, eg. CPU_PLATFORM_SKYLAKE. Default is
// determined by GCE.
MinCpuPlatform string
// Name of the instance.
Name string
// Operating system of the instance.
Os string
// Password is the default user's password. Only used for Windows.
Password string
// Auth scopes for the instance.
Scopes []string
// Path to a setup script for the instance, optional. Should be either
// absolute or relative to the parent GCloud instance's workdir. The
// setup script runs once after the instance is created. For Windows,
// this is assumed to be a PowerShell script and runs during sysprep.
// For Linux, the script needs to be executable via the shell (ie. use
// a shebang for Python scripts).
SetupScript string
// The service account to use for this instance. Will default to
// SERVICE_ACCOUNT_DEFAULT if unspecified.
ServiceAccount string
// Path to a startup script for the instance, optional. Should be either
// absolute or relative to the parent GCloud instance's workdir. The
// startup script runs as root every time the instance starts up. For
// Windows, this is assumed to be a PowerShell script. For Linux, the
// script needs to be executable via the shell (ie. use a shebang for
// Python scripts).
StartupScript string
// Tags for the instance.
Tags []string
// Default user name for the instance.
User string
}
// scriptToMetadata reads the given script and inserts it into the Instance's
// metadata.
func scriptToMetadata(vm *Instance, key, path string) error {
var script string
b, err := ioutil.ReadFile(path)
if err != nil {
return err
}
script = string(b)
if vm.Os == OS_WINDOWS {
script = util.ToDos(script)
}
if vm.Metadata == nil {
vm.Metadata = map[string]string{}
}
vm.Metadata[key] = script
return nil
}
// setupScriptToMetadata reads the setup script and returns a MetadataItems.
func setupScriptToMetadata(vm *Instance) error {
key := SETUP_SCRIPT_KEY_WIN
if vm.Os != OS_WINDOWS {
key = SETUP_SCRIPT_KEY_LINUX
if vm.MetadataDownloads == nil {
vm.MetadataDownloads = map[string]string{}
}
vm.MetadataDownloads[SETUP_SCRIPT_PATH_LINUX] = fmt.Sprintf(metadata.METADATA_URL, "instance", SETUP_SCRIPT_KEY_LINUX)
}
return scriptToMetadata(vm, key, vm.SetupScript)
}
// startupScriptToMetadata reads the startup script and returns a MetadataItems.
func startupScriptToMetadata(vm *Instance) error {
key := "startup-script"
if vm.Os == OS_WINDOWS {
key = "windows-startup-script-ps1"
}
return scriptToMetadata(vm, key, vm.StartupScript)
}
// createInstance creates the given VM instance.
func (g *GCloud) createInstance(vm *Instance, ignoreExists bool) error {
sklog.Infof("Creating instance %q", vm.Name)
if vm.Name == "" {
return fmt.Errorf("Instance name is required.")
}
if vm.Os == "" {
return fmt.Errorf("Instance OS is required.")
}
disks := []*compute.AttachedDisk{}
if vm.BootDisk != nil {
disks = append(disks, &compute.AttachedDisk{
AutoDelete: true,
Boot: true,
DeviceName: vm.BootDisk.Name,
Source: fmt.Sprintf("projects/%s/zones/%s/disks/%s", g.project, g.zone, vm.BootDisk.Name),
})
}
if vm.DataDisk != nil {
d := &compute.AttachedDisk{
DeviceName: vm.DataDisk.Name,
}
if vm.DataDisk.Type == DISK_TYPE_LOCAL_SSD {
// In this case, we didn't create the disk beforehand.
d.AutoDelete = true
d.InitializeParams = &compute.AttachedDiskInitializeParams{
DiskType: fmt.Sprintf("zones/%s/diskTypes/%s", g.zone, vm.DataDisk.Type),
}
d.Type = "SCRATCH"
} else {
d.Source = fmt.Sprintf("projects/%s/zones/%s/disks/%s", g.project, g.zone, vm.DataDisk.Name)
}
disks = append(disks, d)
}
if vm.Os == OS_WINDOWS && vm.User != "" && vm.Password != "" {
if vm.Metadata == nil {
vm.Metadata = map[string]string{}
}
vm.Metadata["gce-initial-windows-user"] = vm.User
vm.Metadata["gce-initial-windows-password"] = vm.Password
}
if vm.MaintenancePolicy == "" {
vm.MaintenancePolicy = MAINTENANCE_POLICY_MIGRATE
}
if vm.SetupScript != "" {
if err := setupScriptToMetadata(vm); err != nil {
return err
}
}
if vm.ServiceAccount == "" {
vm.ServiceAccount = SERVICE_ACCOUNT_DEFAULT
}
if vm.Os == OS_WINDOWS && vm.StartupScript != "" {
// On Windows, the setup script runs automatically during
// sysprep which is before the startup script runs. On Linux
// the startup script does not run automatically, so to ensure
// that the startup script runs after the setup script, we have
// to wait to set the startup-script metadata item until after
// we have manually run the setup script.
if err := startupScriptToMetadata(vm); err != nil {
return err
}
}
metadata := make([]*compute.MetadataItems, 0, len(vm.Metadata))
for k, v := range vm.Metadata {
metadata = append(metadata, &compute.MetadataItems{
Key: k,
Value: v,
})
}
i := &compute.Instance{
Disks: disks,
MachineType: fmt.Sprintf("zones/%s/machineTypes/%s", g.zone, vm.MachineType),
Metadata: &compute.Metadata{
Items: metadata,
},
MinCpuPlatform: vm.MinCpuPlatform,
Name: vm.Name,
NetworkInterfaces: []*compute.NetworkInterface{
{
AccessConfigs: []*compute.AccessConfig{
{
NatIP: vm.ExternalIpAddress,
Type: "ONE_TO_ONE_NAT",
},
},
Network: NETWORK_DEFAULT,
},
},
Scheduling: &compute.Scheduling{
OnHostMaintenance: vm.MaintenancePolicy,
},
ServiceAccounts: []*compute.ServiceAccount{
{
Email: vm.ServiceAccount,
Scopes: vm.Scopes,
},
},
Tags: &compute.Tags{
Items: vm.Tags,
},
}
if vm.Gpu {
i.GuestAccelerators = []*compute.AcceleratorConfig{
&compute.AcceleratorConfig{
AcceleratorCount: 1,
AcceleratorType: ACCELERATOR_TYPE_NVIDIA_TESLA_K80,
},
}
}
op, err := g.s.Instances.Insert(g.project, g.zone, i).Do()
if err != nil {
if strings.Contains(err.Error(), errAlreadyExists) {
if ignoreExists {
sklog.Infof("Instance %q already exists; ignoring.", vm.Name)
} else {
return fmt.Errorf("Instance %q already exists.", vm.Name)
}
} else {
return err
}
} else if op.Error != nil {
return fmt.Errorf("Failed to insert instance: %v", op.Error)
} else {
if err := g.waitForInstance(vm.Name, instanceStatusRunning, maxWaitTime); err != nil {
return err
}
sklog.Infof("Successfully created instance %s", vm.Name)
}
// Obtain the instance IP address if necessary.
if vm.ExternalIpAddress == "" {
ip, err := g.GetIpAddress(vm)
if err != nil {
return err
}
vm.ExternalIpAddress = ip
}
if err := g.WaitForInstanceReady(vm, maxWaitTime); err != nil {
return err
}
return nil
}
// DeleteInstance deletes the given GCE VM instance.
func (g *GCloud) DeleteInstance(name string, ignoreNotExists bool) error {
sklog.Infof("Deleting instance %q", name)
op, err := g.s.Instances.Delete(g.project, g.zone, name).Do()
if err != nil {
if strings.Contains(err.Error(), errNotFound) {
if ignoreNotExists {
sklog.Infof("Instance %q does not exist; ignoring.", name)
} else {
return fmt.Errorf("Instance %q does not exist.", name)
}
} else {
return fmt.Errorf("Failed to delete instance %q: %s", name, err)
}
} else if op.Error != nil {
return fmt.Errorf("Failed to delete instance: %v", op.Error)
} else {
if err := g.waitForInstance(name, instanceStatusError, maxWaitTime); err != nil {
return err
}
sklog.Infof("Successfully deleted instance %s", name)
}
return nil
}
// getInstanceStatus returns the current status of the instance.
func (g *GCloud) getInstanceStatus(name string) string {
i, err := g.s.Instances.Get(g.project, g.zone, name).Do()
if err != nil {
return instanceStatusError
}
return i.Status
}
// waitForInstance waits until the instance has the given status.
func (g *GCloud) waitForInstance(name, status string, timeout time.Duration) error {
start := time.Now()
for st := g.getInstanceStatus(name); st != status; st = g.getInstanceStatus(name) {
if time.Now().Sub(start) > timeout {
return fmt.Errorf("Instance did not have status %q within timeout of %s", status, timeout)
}
sklog.Infof("Waiting for instance %q (status %s)", name, st)
time.Sleep(5 * time.Second)
}
return nil
}
// GetIpAddress obtains the IP address for the Instance.
func (g *GCloud) GetIpAddress(vm *Instance) (string, error) {
inst, err := g.s.Instances.Get(g.project, g.zone, vm.Name).Do()
if err != nil {
return "", err
}
if len(inst.NetworkInterfaces) != 1 {
return "", fmt.Errorf("Failed to obtain IP address: Instance has incorrect number of network interfaces: %d", len(inst.NetworkInterfaces))
}
if len(inst.NetworkInterfaces[0].AccessConfigs) != 1 {
return "", fmt.Errorf("Failed to obtain IP address: Instance network interface has incorrect number of access configs: %d", len(inst.NetworkInterfaces[0].AccessConfigs))
}
ip := inst.NetworkInterfaces[0].AccessConfigs[0].NatIP
if ip == "" {
return "", fmt.Errorf("Failed to obtain IP address: Got empty IP address.")
}
return ip, nil
}
// sshArgs returns options for SSH or an error if applicable.
func sshArgs() ([]string, error) {
usr, err := user.Current()
if err != nil {
return nil, err
}
keyFile := path.Join(usr.HomeDir, ".ssh", "google_compute_engine")
if _, err := os.Stat(keyFile); os.IsNotExist(err) {
return nil, fmt.Errorf("You need to create an SSH key at %s, per https://cloud.google.com/compute/docs/instances/connecting-to-instance#generatesshkeypair", keyFile)
}
return []string{
"-q", "-i", keyFile,
"-o", "UserKnownHostsFile=/dev/null",
"-o", "StrictHostKeyChecking=no",
}, nil
}
// Ssh logs into the instance and runs the given command. Returns any output
// and an error if applicable.
func (g *GCloud) Ssh(vm *Instance, cmd ...string) (string, error) {
if vm.Os == OS_WINDOWS {
return "", fmt.Errorf("Cannot SSH into Windows machines (for: %v)", cmd)
}
if vm.ExternalIpAddress == "" {
ip, err := g.GetIpAddress(vm)
if err != nil {
return "", err
}
vm.ExternalIpAddress = ip
}
args, err := sshArgs()
if err != nil {
return "", err
}
command := []string{"ssh"}
command = append(command, args...)
command = append(command, fmt.Sprintf("%s@%s", vm.User, vm.ExternalIpAddress))
command = append(command, cmd...)
sklog.Infof("Running %s", strings.Join(command, " "))
return exec.RunCwd(".", command...)
}
// Scp copies files to the instance. The src argument is expected to be
// absolute.
func (g *GCloud) Scp(vm *Instance, src, dst string) error {
if vm.Os == OS_WINDOWS {
return fmt.Errorf("Cannot SCP to Windows machines (for: %s)", dst)
}
if vm.ExternalIpAddress == "" {
ip, err := g.GetIpAddress(vm)
if err != nil {
return err
}
vm.ExternalIpAddress = ip
}
if !filepath.IsAbs(src) {
return fmt.Errorf("%q is not an absolute path.", src)
}
args, err := sshArgs()
if err != nil {
return err
}
command := []string{"scp"}
command = append(command, args...)
command = append(command, src, fmt.Sprintf("%s@%s:%s", vm.User, vm.ExternalIpAddress, dst))
sklog.Infof("Copying %s -> %s@%s:%s", src, vm.User, vm.Name, dst)
_, err = exec.RunCwd(".", command...)
return err
}
// Reboot stops and starts the instance.
func (g *GCloud) Reboot(vm *Instance) error {
sklog.Infof("Rebooting instance %q", vm.Name)
op, err := g.s.Instances.Stop(g.project, g.zone, vm.Name).Do()
if err != nil {
return err
} else if op.Error != nil {
return fmt.Errorf("Failed to stop instance: %v", op.Error)
}
if err := g.waitForInstance(vm.Name, instanceStatusStopped, maxWaitTime); err != nil {
return err
}
op, err = g.s.Instances.Start(g.project, g.zone, vm.Name).Do()
if err != nil {
return err
} else if op.Error != nil {
return fmt.Errorf("Failed to start instance: %v", op.Error)
}
if err := g.waitForInstance(vm.Name, instanceStatusRunning, maxWaitTime); err != nil {
return err
}
// Instance IP address may change at reboot.
ip, err := g.GetIpAddress(vm)
if err != nil {
return err
}
vm.ExternalIpAddress = ip
if err := g.WaitForInstanceReady(vm, maxWaitTime); err != nil {
return err
}
return nil
}
// IsInstanceReady returns true iff the instance is ready.
func (g *GCloud) IsInstanceReady(vm *Instance) (bool, error) {
if vm.Os == OS_WINDOWS {
serial, err := g.s.Instances.GetSerialPortOutput(g.project, g.zone, vm.Name).Do()
if err != nil {
return false, err
}
if strings.Contains(serial.Contents, winStartupFinishedText) {
return true, nil
}
if strings.Contains(serial.Contents, winSetupFinishedText) {
return true, nil
}
return false, nil
} else {
if _, err := g.Ssh(vm, "true"); err != nil {
return false, nil
}
return true, nil
}
}
// WaitForInstanceReady waits until the instance is ready to use.
func (g *GCloud) WaitForInstanceReady(vm *Instance, timeout time.Duration) error {
start := time.Now()
if err := g.waitForInstance(vm.Name, instanceStatusRunning, timeout); err != nil {
return err
}
for {
if time.Now().Sub(start) > timeout {
return fmt.Errorf("Instance was not ready within timeout of %s", timeout)
}
ready, err := g.IsInstanceReady(vm)
if err != nil {
return err
}
if ready {
return nil
}
sklog.Infof("Waiting for instance %q to be ready.", vm.Name)
time.Sleep(5 * time.Second)
}
}
// DownloadFile downloads the given file from Google Cloud Storage to the
// instance.
func (g *GCloud) DownloadFile(vm *Instance, src, dst string) error {
_, err := g.Ssh(vm, "gsutil", "cp", src, dst)
return err
}
// GetFileFromMetadata downloads the given metadata entry to a file.
func (g *GCloud) GetFileFromMetadata(vm *Instance, url, dst string) error {
_, err := g.Ssh(vm, "wget", "--header", "'Metadata-Flavor: Google'", "--output-document", dst, url)
return err
}
// SafeFormatAndMount copies the safe_format_and_mount script to the instance
// and runs it.
func (g *GCloud) SafeFormatAndMount(vm *Instance) error {
// Copy the format_and_mount.sh and safe_format_and_mount
// scripts to the instance.
_, filename, _, _ := runtime.Caller(0)
dir := path.Dir(filename)
if err := g.Scp(vm, path.Join(dir, "format_and_mount.sh"), "/tmp/format_and_mount.sh"); err != nil {
return err
}
if err := g.Scp(vm, path.Join(dir, "safe_format_and_mount"), "/tmp/safe_format_and_mount"); err != nil {
return err
}
// Run format_and_mount.sh.
if _, err := g.Ssh(vm, "/tmp/format_and_mount.sh", vm.DataDisk.Name); err != nil {
if !strings.Contains(err.Error(), "is already mounted") {
return err
}
}
return nil
}
// SetMetadata sets the given metadata on the instance.
func (g *GCloud) SetMetadata(vm *Instance, md map[string]string) error {
items := make([]*compute.MetadataItems, 0, len(md))
for k, v := range md {
items = append(items, &compute.MetadataItems{
Key: k,
Value: v,
})
}
op, err := g.s.Instances.SetMetadata(g.project, g.zone, vm.Name, &compute.Metadata{
Items: items,
}).Do()
if err != nil {
return err
} else if op.Error != nil {
return fmt.Errorf("Failed to set instance metadata: %v", op.Error)
}
return nil
}
// CreateAndSetup creates an instance and all its disks and performs any
// additional setup steps.
func (g *GCloud) CreateAndSetup(vm *Instance, ignoreExists bool) error {
// Create the disks and the instance.
if vm.BootDisk != nil {
if err := g.CreateDisk(vm.BootDisk, ignoreExists); err != nil {
return err
}
}
if vm.DataDisk != nil {
if vm.Os == OS_WINDOWS {
return fmt.Errorf("Data disks are not currently supported on Windows.")
}
// Local SSDs are created with the instance.
if vm.DataDisk.Type != DISK_TYPE_LOCAL_SSD {
if err := g.CreateDisk(vm.DataDisk, ignoreExists); err != nil {
return err
}
}
}
if err := g.createInstance(vm, ignoreExists); err != nil {
return err
}
if vm.Os == OS_WINDOWS {
// Set the metadata on the instance again, due to a bug
// which is lost to time.
if err := g.SetMetadata(vm, vm.Metadata); err != nil {
return err
}
} else {
// There is a setup process which takes place after instance
// creation. It holds the dpkg lock and it reboots the instance
// when finished, so we need to wait for it to complete before
// performing our own setup.
sklog.Infof("Waiting for setup on %s to complete.", vm.Name)
if _, err := g.Ssh(vm, "sleep", "300"); err != nil {
sklog.Infof("Setup finished on %s", vm.Name)
} else {
sklog.Infof("Setup did not finish on %s within 5 minutes. Continuing anyway.", vm.Name)
}
// Instance IP address may change at reboot.
ip, err := g.GetIpAddress(vm)
if err != nil {
return err
}
vm.ExternalIpAddress = ip
if err := g.WaitForInstanceReady(vm, maxWaitTime); err != nil {
return err
}
}
// Format and mount.
if vm.DataDisk != nil {
if err := g.SafeFormatAndMount(vm); err != nil {
return err
}
}
// GSutil downloads.
for dst, src := range vm.GSDownloads {
if err := g.DownloadFile(vm, src, dst); err != nil {
return err
}
}
// Metadata downloads.
for dst, src := range vm.MetadataDownloads {
if err := g.GetFileFromMetadata(vm, src, dst); err != nil {
return err
}
}
// On Windows, the setup script runs automatically during sysprep. On
// Linux, we have to run the setup script manually. In order to ensure
// that the setup script runs before the startup script, we delay
// setting the startup-script in metadata until after we've run the
// setup script.
if vm.Os != OS_WINDOWS {
if vm.SetupScript != "" {
if _, err := g.Ssh(vm, "sudo", "chmod", "+x", SETUP_SCRIPT_PATH_LINUX, "&&", SETUP_SCRIPT_PATH_LINUX); err != nil {
return err
}
}
if vm.StartupScript != "" {
if err := startupScriptToMetadata(vm); err != nil {
return err
}
if err := g.SetMetadata(vm, vm.Metadata); err != nil {
return err
}
}
}
// Reboot the instance. On Windows, this will cause the startup script to run.
if err := g.Reboot(vm); err != nil {
return err
}
return nil
}
// Delete removes the instance and (maybe) its disks.
func (g *GCloud) Delete(vm *Instance, ignoreNotExists, deleteDataDisk bool) error {
// Delete the instance. The boot disk will be auto-deleted.
if err := g.DeleteInstance(vm.Name, true); err != nil {
return err
}
// Only delete the data disk(s) if explicitly told to do so.
// Local SSDs are auto-deleted with the instance.
if deleteDataDisk && vm.DataDisk != nil && vm.DataDisk.Type != DISK_TYPE_LOCAL_SSD {
if err := g.DeleteDisk(vm.DataDisk.Name, true); err != nil {
return err
}
}
return nil
}
// GetImages returns all of the images from the project.
func (g *GCloud) GetImages() ([]*compute.Image, error) {
rv := []*compute.Image{}
page := ""
for {
images, err := g.s.Images.List(g.project).PageToken(page).Do()
if err != nil {
return nil, fmt.Errorf("Failed to load the list of images: %s", err)
}
rv = append(rv, images.Items...)
if images.NextPageToken == "" {
return rv, nil
}
page = images.NextPageToken
}
}
// CaptureImage captures an image from the instance's boot disk. The instance
// has to be deleted in order to capture the image, and we delete the boot disk
// after capture for cleanliness.
func (g *GCloud) CaptureImage(vm *Instance, family, description string) error {
// Create an image name based on the family, current date, and number of
// images created today.
images, err := g.GetImages()
if err != nil {
return err
}
now := time.Now().UTC()
imageName := fmt.Sprintf("%s-v%s", family, now.Format(DATE_FORMAT))
suffix := 0
for _, image := range images {
if strings.HasPrefix(image.Name, imageName) {
suffix++
}
}
imageName = fmt.Sprintf("%s-%03d", imageName, suffix)
sklog.Infof("About to capture image %q", imageName)
// Set auto-delete to off for the boot disk.
sklog.Infof("Turning off auto-delete for %q", vm.BootDisk.Name)
op, err := g.s.Instances.SetDiskAutoDelete(g.project, g.zone, vm.Name, false, vm.BootDisk.Name).Do()
if err != nil {
return fmt.Errorf("Failed to set auto-delete on disk %q: %s", vm.BootDisk.Name, err)
} else if op.Error != nil {
return fmt.Errorf("Failed to set auto-delete on disk %q: %s", vm.BootDisk.Name, op.Error)
}
user := strings.Split(op.User, "@")[0]
// Spin until auto-delete is actually off for the instance.
started := time.Now()
for {
time.Sleep(5 * time.Second)
inst, err := g.s.Instances.Get(g.project, g.zone, vm.Name).Do()
if err != nil {
return fmt.Errorf("Failed to retrieve instance details: %s", err)
}
var d *compute.AttachedDisk
for _, disk := range inst.Disks {
if disk.Boot {
d = disk
break
}
}
if d == nil {
return fmt.Errorf("Unable to find the boot disk!")
}
if !d.AutoDelete {
break
}
if time.Now().Sub(started) > maxWaitTime {
return fmt.Errorf("Auto-delete was not unset on %q within the acceptable time period.", vm.BootDisk.Name)
}
sklog.Infof("Waiting for auto-delete to be off for %q", vm.BootDisk.Name)
}
// Delete the instance.
if err := g.DeleteInstance(vm.Name, true); err != nil {
return err
}
// Capture the image.
sklog.Infof("Capturing disk image.")
op, err = g.s.Images.Insert(g.project, &compute.Image{
Description: description,
Family: family,
Labels: map[string]string{
"created-by": user,
"created-on": now.Format(DATETIME_FORMAT),
},
Name: imageName,
SourceDisk: fmt.Sprintf("projects/%s/zones/%s/disks/%s", g.project, g.zone, vm.BootDisk.Name),
}).Do()
if err != nil {
return fmt.Errorf("Failed to capture image of %q: %s", vm.BootDisk.Name, err)
} else if op.Error != nil {
return fmt.Errorf("Failed to capture image of %q: %s", vm.BootDisk.Name, op.Error)
}
// Wait for the image capture to complete.
started = time.Now()
for {
time.Sleep(5 * time.Second)
images, err := g.GetImages()
if err != nil {
return err
}
found := false
for _, img := range images {
if img.Name == imageName && img.Status == IMAGE_STATUS_READY {
found = true
break
}
}
if found {
break
}
sklog.Infof("Waiting for image capture to finish.")
}
// Delete the boot disk.
sklog.Infof("Deleting disk %q", vm.BootDisk.Name)
op, err = g.s.Disks.Delete(g.project, g.zone, vm.BootDisk.Name).Do()
if err != nil {
return fmt.Errorf("Failed to delete disk %q: %s", vm.BootDisk.Name, err)
} else if op.Error != nil {
return fmt.Errorf("Failed to delete disk %q: %s", vm.BootDisk.Name, op.Error)
}
sklog.Infof("Successfully captured image %q", imageName)
return nil
}