[tmm] Add support for talking to ChromeOS over SSH.
This ports the logic from https://chrome-internal.googlesource.com/infradata/config/+/1f7926b4420596666f4404092cd7c05b7b3a727c/configs/chromium-swarm/scripts/skia_mobile.py#619
Change-Id: I207924a83c86be53ed4c747688678b39c5b452ad
BUG: skia:12401
Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/447765
Commit-Queue: Kevin Lubick <kjlubick@google.com>
Reviewed-by: Joe Gregorio <jcgregorio@google.com>
diff --git a/machine/go/test_machine_monitor/machine/BUILD.bazel b/machine/go/test_machine_monitor/machine/BUILD.bazel
index c54975b..58e4acd 100644
--- a/machine/go/test_machine_monitor/machine/BUILD.bazel
+++ b/machine/go/test_machine_monitor/machine/BUILD.bazel
@@ -17,6 +17,7 @@
"//machine/go/machine/store",
"//machine/go/machineserver/config",
"//machine/go/test_machine_monitor/adb",
+ "//machine/go/test_machine_monitor/ssh",
"//machine/go/test_machine_monitor/swarming",
],
)
@@ -38,6 +39,7 @@
"//machine/go/machine/source/pubsubsource",
"//machine/go/machineserver/config",
"//machine/go/test_machine_monitor/adb",
+ "//machine/go/test_machine_monitor/ssh",
"//machine/go/test_machine_monitor/swarming",
"@com_github_stretchr_testify//assert",
"@com_github_stretchr_testify//require",
diff --git a/machine/go/test_machine_monitor/machine/machine.go b/machine/go/test_machine_monitor/machine/machine.go
index c90bfd6..19c513b 100644
--- a/machine/go/test_machine_monitor/machine/machine.go
+++ b/machine/go/test_machine_monitor/machine/machine.go
@@ -4,6 +4,9 @@
import (
"context"
"os"
+ "regexp"
+ "strconv"
+ "strings"
"sync"
"time"
@@ -17,6 +20,7 @@
"go.skia.org/infra/machine/go/machine/store"
"go.skia.org/infra/machine/go/machineserver/config"
"go.skia.org/infra/machine/go/test_machine_monitor/adb"
+ "go.skia.org/infra/machine/go/test_machine_monitor/ssh"
"go.skia.org/infra/machine/go/test_machine_monitor/swarming"
)
@@ -35,6 +39,9 @@
// adb makes calls to the adb server.
adb adb.Adb
+ // ssh is an abstraction around an ssh executor
+ ssh ssh.SSH
+
// MachineID is the swarming id of the machine.
MachineID string
@@ -239,7 +246,7 @@
func (m *Machine) tryInterrogatingAndroidDevice(ctx context.Context) (machine.Android, bool) {
var ret machine.Android
if uptime, err := m.adb.Uptime(ctx); err != nil {
- sklog.Warningf("Failed to read uptime - assuming there is no device attached: %s", err)
+ sklog.Warningf("Failed to read uptime - assuming there is no Android device attached: %s", err)
return ret, false // Assume there is no Android device attached.
} else {
ret.Uptime = uptime
@@ -270,7 +277,46 @@
return machine.IOS{}, false
}
-func (m *Machine) tryInterrogatingChromeOSDevice(_ context.Context) (machine.ChromeOS, bool) {
- // TODO(kjlubick)
- return machine.ChromeOS{}, false
+var (
+ chromeOSReleaseRegex = regexp.MustCompile(`CHROMEOS_RELEASE_VERSION=(\S+)`)
+ chromeOSMilestoneRegex = regexp.MustCompile(`CHROMEOS_RELEASE_CHROME_MILESTONE=(\S+)`)
+ chromeOSTrackRegex = regexp.MustCompile(`CHROMEOS_RELEASE_TRACK=(\S+)`)
+)
+
+func (m *Machine) tryInterrogatingChromeOSDevice(ctx context.Context) (machine.ChromeOS, bool) {
+ if m.description.SSHUserIP == "" {
+ return machine.ChromeOS{}, false
+ }
+ lsbReleaseContents, err := m.ssh.Run(ctx, m.description.SSHUserIP, "cat", "/etc/lsb-release")
+ if err != nil {
+ sklog.Warningf("Failed to read lsb-release - assuming there is no ChromeOS device attached: %s", err)
+ return machine.ChromeOS{}, false
+ }
+ rv := machine.ChromeOS{}
+ if match := chromeOSReleaseRegex.FindStringSubmatch(lsbReleaseContents); match != nil {
+ rv.ReleaseVersion = match[1]
+ }
+ if match := chromeOSMilestoneRegex.FindStringSubmatch(lsbReleaseContents); match != nil {
+ rv.Milestone = match[1]
+ }
+ if match := chromeOSTrackRegex.FindStringSubmatch(lsbReleaseContents); match != nil {
+ rv.Channel = match[1]
+ }
+ if rv.ReleaseVersion == "" && rv.Milestone == "" && rv.Channel == "" {
+ sklog.Errorf("Could not find ChromeOS data in /etc/lsb-release. Are we sure this is the right IP?\n%s", lsbReleaseContents)
+ return machine.ChromeOS{}, false
+ }
+
+ uptime, err := m.ssh.Run(ctx, m.description.SSHUserIP, "cat", "/proc/uptime")
+ if err != nil {
+ sklog.Warningf("Could not read ChromeOS uptime %s", err)
+ } else {
+ u := strings.Split(uptime, " ")[0]
+ if f, err := strconv.ParseFloat(u, 64); err != nil {
+ sklog.Warningf("Invalid /proc/uptime format: %q", uptime)
+ } else {
+ rv.Uptime = time.Duration(f * float64(time.Second))
+ }
+ }
+ return rv, true
}
diff --git a/machine/go/test_machine_monitor/machine/machine_test.go b/machine/go/test_machine_monitor/machine/machine_test.go
index ee4aaf1..31fe509 100644
--- a/machine/go/test_machine_monitor/machine/machine_test.go
+++ b/machine/go/test_machine_monitor/machine/machine_test.go
@@ -14,6 +14,7 @@
"go.skia.org/infra/go/testutils/unittest"
"go.skia.org/infra/machine/go/machine"
"go.skia.org/infra/machine/go/test_machine_monitor/adb"
+ "go.skia.org/infra/machine/go/test_machine_monitor/ssh"
)
const (
@@ -23,12 +24,35 @@
dumpSysThermalPlaceholder = "Placeholder dumpsys thermal response"
// This formatting matters because it is processed in adb.go
adbUptimePlaceholder = "123.4 567.8"
+
+ testUserIP = "root@skia-foobar-01"
+ // This example was taken directly from a production ChromeOS device
+ sampleChromeOSlsbrelease = `CHROMEOS_DEVSERVER=
+CHROMEOS_RELEASE_APPID={9A3BE5D2-C3DC-4AE6-9943-E2C113895DC5}
+CHROMEOS_RELEASE_BOARD=octopus-signed-mp-v23keys
+CHROMEOS_RELEASE_BRANCH_NUMBER=56
+CHROMEOS_RELEASE_BUILDER_PATH=octopus-release/R89-13729.56.0
+CHROMEOS_RELEASE_BUILD_NUMBER=13729
+CHROMEOS_RELEASE_BUILD_TYPE=Official Build
+CHROMEOS_RELEASE_CHROME_MILESTONE=89
+CHROMEOS_RELEASE_DESCRIPTION=13729.56.0 (Official Build) stable-channel octopus
+CHROMEOS_RELEASE_KEYSET=mp-v23
+CHROMEOS_RELEASE_NAME=Chrome OS
+CHROMEOS_RELEASE_PATCH_NUMBER=0
+CHROMEOS_RELEASE_TRACK=stable-channel
+CHROMEOS_RELEASE_UNIBUILD=1
+CHROMEOS_RELEASE_VERSION=13729.56.0
+DEVICETYPE=CHROMEBOOK
+GOOGLE_RELEASE=13729.56.0`
+
+ // This example was taken directly from a production ChromeOS device
+ sampleChromeOSUptime = "1234.5 5678.9"
)
func TestTryInterrogatingAndroidDevice_DeviceAttached_Success(t *testing.T) {
unittest.SmallTest(t)
ctx := executil.FakeTestsContext(
- "Test_FakeExe_Uptime_ReturnsPlaceholder",
+ "Test_FakeExe_ADBUptime_ReturnsPlaceholder",
"Test_FakeExe_AdbShellGetProp_ReturnsPlaceholder",
"Test_FakeExe_RawDumpSysBattery_ReturnsPlaceholder",
"Test_FakeExe_RawDumpSysThermal_ReturnsPlaceholder",
@@ -59,7 +83,7 @@
func TestTryInterrogatingAndroidDevice_ThermalFails_PartialSuccess(t *testing.T) {
unittest.SmallTest(t)
ctx := executil.FakeTestsContext(
- "Test_FakeExe_Uptime_ReturnsPlaceholder",
+ "Test_FakeExe_ADBUptime_ReturnsPlaceholder",
"Test_FakeExe_AdbShellGetProp_ReturnsPlaceholder",
"Test_FakeExe_RawDumpSysBattery_ReturnsPlaceholder",
"Test_FakeExe_ExitCodeOne",
@@ -75,10 +99,77 @@
}, actual)
}
+func TestTryInterrogatingChromeOS_DeviceReachable_Success(t *testing.T) {
+ unittest.SmallTest(t)
+ ctx := executil.FakeTestsContext(
+ "Test_FakeExe_SSHLSBRelease_ReturnsPlaceholder",
+ "Test_FakeExe_SSHUptime_ReturnsPlaceholder",
+ )
+
+ m := &Machine{ssh: ssh.ExeImpl{}, description: machine.Description{SSHUserIP: testUserIP}}
+ actual, ok := m.tryInterrogatingChromeOSDevice(ctx)
+ assert.True(t, ok)
+ assert.Equal(t, machine.ChromeOS{
+ Channel: "stable-channel",
+ Milestone: "89",
+ ReleaseVersion: "13729.56.0",
+ Uptime: 1234500 * time.Millisecond,
+ }, actual)
+}
+
+func TestTryInterrogatingChromeOS_CatLSBReleaseFails_DeviceConsideredUnattached(t *testing.T) {
+ unittest.SmallTest(t)
+ ctx := executil.FakeTestsContext(
+ "Test_FakeExe_ExitCodeOne",
+ )
+
+ m := &Machine{ssh: ssh.ExeImpl{}, description: machine.Description{SSHUserIP: testUserIP}}
+ _, ok := m.tryInterrogatingChromeOSDevice(ctx)
+ assert.False(t, ok)
+}
+
+func TestTryInterrogatingChromeOS_NoSSHUserIP_ReturnFalse(t *testing.T) {
+ unittest.SmallTest(t)
+ ctx := executil.FakeTestsContext() // Any exe call will panic
+
+ m := &Machine{ssh: ssh.ExeImpl{}}
+ _, ok := m.tryInterrogatingChromeOSDevice(ctx)
+ assert.False(t, ok)
+}
+
+func TestTryInterrogatingChromeOS_PartialData_PartialSuccess(t *testing.T) {
+ unittest.SmallTest(t)
+ ctx := executil.FakeTestsContext(
+ "Test_FakeExe_SSHLSBRelease_ReturnsPlaceholder",
+ "Test_FakeExe_ExitCodeOne", // pretend uptime fails
+ )
+
+ m := &Machine{ssh: ssh.ExeImpl{}, description: machine.Description{SSHUserIP: testUserIP}}
+ actual, ok := m.tryInterrogatingChromeOSDevice(ctx)
+ assert.True(t, ok)
+ assert.Equal(t, machine.ChromeOS{
+ Channel: "stable-channel",
+ Milestone: "89",
+ ReleaseVersion: "13729.56.0",
+ // No uptime reported
+ }, actual)
+}
+
+func TestTryInterrogatingChromeOS_NoChromeOSData_AssumesNotAttached(t *testing.T) {
+ unittest.SmallTest(t)
+ ctx := executil.FakeTestsContext(
+ "Test_FakeExe_SSHLSBRelease_ReturnsNonChromeOS",
+ )
+
+ m := &Machine{ssh: ssh.ExeImpl{}, description: machine.Description{SSHUserIP: testUserIP}}
+ _, ok := m.tryInterrogatingChromeOSDevice(ctx)
+ assert.False(t, ok)
+}
+
func TestInterrogate_NoDeviceAttached_Success(t *testing.T) {
unittest.SmallTest(t)
ctx := executil.FakeTestsContext(
- "Test_FakeExe_ExitCodeOne", // No android device
+ "Test_FakeExe_ExitCodeOne", // No Android device
)
m := &Machine{
@@ -110,7 +201,7 @@
func TestInterrogate_AndroidDeviceAttached_Success(t *testing.T) {
unittest.SmallTest(t)
ctx := executil.FakeTestsContext(
- "Test_FakeExe_Uptime_ReturnsPlaceholder",
+ "Test_FakeExe_ADBUptime_ReturnsPlaceholder",
"Test_FakeExe_AdbShellGetProp_ReturnsPlaceholder",
"Test_FakeExe_RawDumpSysBattery_ReturnsPlaceholder",
"Test_FakeExe_RawDumpSysThermal_ReturnsPlaceholder",
@@ -148,7 +239,50 @@
}, actual)
}
-func Test_FakeExe_Uptime_ReturnsPlaceholder(t *testing.T) {
+func TestInterrogate_ChromeOSDeviceAttached_Success(t *testing.T) {
+ unittest.SmallTest(t)
+ ctx := executil.FakeTestsContext(
+ "Test_FakeExe_SSHLSBRelease_ReturnsPlaceholder",
+ "Test_FakeExe_SSHUptime_ReturnsPlaceholder",
+ // We found a device, no need to check for adb
+ )
+
+ m := &Machine{
+ ssh: ssh.ExeImpl{},
+ MachineID: "some-machine",
+ Hostname: "some-hostname",
+ description: machine.Description{
+ SSHUserIP: testUserIP,
+ },
+ KubernetesImage: "deprecated",
+ Version: "some-version",
+ runningTask: true,
+ startSwarming: true,
+ startTime: time.Date(2021, time.September, 2, 2, 2, 2, 2, time.UTC),
+ interrogateTimer: noop.Float64SummaryMetric{},
+ }
+ actual := m.interrogate(ctx)
+ assert.Equal(t, machine.Event{
+ EventType: machine.EventTypeRawState,
+ LaunchedSwarming: true,
+ RunningSwarmingTask: true,
+ Host: machine.Host{
+ Name: "some-machine",
+ PodName: "some-hostname",
+ KubernetesImage: "deprecated",
+ Version: "some-version",
+ StartTime: time.Date(2021, time.September, 2, 2, 2, 2, 2, time.UTC),
+ },
+ ChromeOS: machine.ChromeOS{
+ Channel: "stable-channel",
+ Milestone: "89",
+ ReleaseVersion: "13729.56.0",
+ Uptime: 1234500 * time.Millisecond,
+ },
+ }, actual)
+}
+
+func Test_FakeExe_ADBUptime_ReturnsPlaceholder(t *testing.T) {
unittest.FakeExeTest(t)
if os.Getenv(executil.OverrideEnvironmentVariable) == "" {
return
@@ -202,6 +336,51 @@
os.Exit(0)
}
+func Test_FakeExe_SSHLSBRelease_ReturnsPlaceholder(t *testing.T) {
+ unittest.FakeExeTest(t)
+ if os.Getenv(executil.OverrideEnvironmentVariable) == "" {
+ return
+ }
+ // Check the input arguments to make sure they were as expected.
+ args := executil.OriginalArgs()
+ require.Contains(t, args, "ssh")
+ require.Contains(t, args, testUserIP)
+ require.Contains(t, args, "/etc/lsb-release")
+
+ fmt.Print(sampleChromeOSlsbrelease)
+ os.Exit(0)
+}
+
+func Test_FakeExe_SSHLSBRelease_ReturnsNonChromeOS(t *testing.T) {
+ unittest.FakeExeTest(t)
+ if os.Getenv(executil.OverrideEnvironmentVariable) == "" {
+ return
+ }
+ // Check the input arguments to make sure they were as expected.
+ args := executil.OriginalArgs()
+ require.Contains(t, args, "ssh")
+ require.Contains(t, args, testUserIP)
+ require.Contains(t, args, "/etc/lsb-release")
+
+ fmt.Print("FOO=bar")
+ os.Exit(0)
+}
+
+func Test_FakeExe_SSHUptime_ReturnsPlaceholder(t *testing.T) {
+ unittest.FakeExeTest(t)
+ if os.Getenv(executil.OverrideEnvironmentVariable) == "" {
+ return
+ }
+ // Check the input arguments to make sure they were as expected.
+ args := executil.OriginalArgs()
+ require.Contains(t, args, "ssh")
+ require.Contains(t, args, testUserIP)
+ require.Contains(t, args, "/proc/uptime")
+
+ fmt.Print(sampleChromeOSUptime)
+ os.Exit(0)
+}
+
func Test_FakeExe_ExitCodeOne(t *testing.T) {
unittest.FakeExeTest(t)
if os.Getenv(executil.OverrideEnvironmentVariable) == "" {
diff --git a/machine/go/test_machine_monitor/ssh/BUILD.bazel b/machine/go/test_machine_monitor/ssh/BUILD.bazel
new file mode 100644
index 0000000..a3b4bfd
--- /dev/null
+++ b/machine/go/test_machine_monitor/ssh/BUILD.bazel
@@ -0,0 +1,12 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+ name = "ssh",
+ srcs = ["ssh.go"],
+ importpath = "go.skia.org/infra/machine/go/test_machine_monitor/ssh",
+ visibility = ["//visibility:public"],
+ deps = [
+ "//go/executil",
+ "//go/skerr",
+ ],
+)
diff --git a/machine/go/test_machine_monitor/ssh/ssh.go b/machine/go/test_machine_monitor/ssh/ssh.go
new file mode 100644
index 0000000..c0dc97f
--- /dev/null
+++ b/machine/go/test_machine_monitor/ssh/ssh.go
@@ -0,0 +1,38 @@
+package ssh
+
+import (
+ "context"
+ "os/exec"
+ "time"
+
+ "go.skia.org/infra/go/executil"
+ "go.skia.org/infra/go/skerr"
+)
+
+const (
+ commandTimeout = 20 * time.Second // chosen arbitrarily
+)
+
+type SSH interface {
+ Run(ctx context.Context, userIP, cmd string, args ...string) (string, error)
+}
+
+// ExeImpl runs SSH via an executable that is assumed to be on the PATH.
+type ExeImpl struct{}
+
+func (e ExeImpl) Run(ctx context.Context, userIP, cmd string, args ...string) (string, error) {
+ ctx, cancel := context.WithTimeout(ctx, commandTimeout)
+ defer cancel()
+ xargs := append([]string{"-oConnectTimeout=15", "-oBatchMode=yes",
+ "-t", "-t", // These might not work on Windows
+ userIP, cmd}, args...)
+ cc := executil.CommandContext(ctx, "ssh", xargs...)
+ b, err := cc.Output()
+ if err != nil {
+ if ee, ok := err.(*exec.ExitError); ok {
+ err = skerr.Wrapf(err, "ssh failed with stderr: %q", ee.Stderr)
+ }
+ return "", err
+ }
+ return string(b), nil
+}