[ansible] Deploy test_machine_monitor to Win machines.

Most of the change is refactoring
copy_service_account_key and install_test_machine_monitor
to handle non-posix machines.

Also remove the old all_linux: group in hosts.yml which
was never used and re-purpose as all linux test machines.

Change-Id: Idd1b5bd0a2827cf339b9b4e4199521d40ef7eda1
Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/446337
Reviewed-by: Erik Rose <erikrose@google.com>
diff --git a/machine/Makefile b/machine/Makefile
index 0bbea4e..60774ad 100644
--- a/machine/Makefile
+++ b/machine/Makefile
@@ -119,4 +119,13 @@
 	  -ldflags="-X 'main.Version=${VERSION}'" \
 	 ./go/test_machine_monitor
 
+build_test_machine_monitor_64-bit_Win32NT:
+	CGO_ENABLED=0 \
+	GOOS=windows \
+	GOARCH=amd64 \
+	go build \
+	 -o ./build/Win32NT/64-bit/test_machine_monitor \
+	  -ldflags="-X 'main.Version=${VERSION}'" \
+	 ./go/test_machine_monitor
+
 include ../make/npm.mk
diff --git a/skolo/ansible/hosts.yml b/skolo/ansible/hosts.yml
index 5113f06..7d5bb54 100644
--- a/skolo/ansible/hosts.yml
+++ b/skolo/ansible/hosts.yml
@@ -80,7 +80,7 @@
         install_test_machine_monitor__run_under_desktop: False
         copy_authorized_keys__jumphost: rack4
 
-    linux:
+    all_linux:
       children:
         rack2_linux:
         rack3_linux:
@@ -283,14 +283,6 @@
       hosts:
         skia-i-gapid-[001:004]:
 
-    all_linux:
-      children:
-        jumphosts:
-        rack2_linux:
-        rack3_linux:
-        rack4_rpis:
-        rack5_linux:
-
     all_mac:
       children:
         rack2_mac:
diff --git a/skolo/ansible/switchboard/install_test_machine_monitor.yml b/skolo/ansible/switchboard/install_test_machine_monitor.yml
index 40606dc..18f9a79 100644
--- a/skolo/ansible/switchboard/install_test_machine_monitor.yml
+++ b/skolo/ansible/switchboard/install_test_machine_monitor.yml
@@ -1,5 +1,5 @@
 # Installs test_machine_monitor.
-- hosts: switchboard_rpis,gapid_test_machines,linux
+- hosts: switchboard_rpis,gapid_test_machines,all_linux,all_mac,all_win
   user: chrome-bot
   gather_facts: true
 
diff --git a/skolo/ansible/switchboard/roles/copy_service_account_key/tasks/main.yml b/skolo/ansible/switchboard/roles/copy_service_account_key/tasks/main.yml
index d45d413..51a24c0 100644
--- a/skolo/ansible/switchboard/roles/copy_service_account_key/tasks/main.yml
+++ b/skolo/ansible/switchboard/roles/copy_service_account_key/tasks/main.yml
@@ -18,10 +18,10 @@
     creates:
       '{{ service_account_key.path }}/application_default_credentials.json'
 
-- name: Copy service account key
-  copy:
-    src: '{{ service_account_key.path }}/application_default_credentials.json'
-    dest: ~{{ skolo_account }}/.config/gcloud/
-    owner: '{{ skolo_account }}'
-    group: '{{ skolo_group }}'
-    mode: '0644'
+- name: Copy service account key posix.
+  import_tasks: posix.yml
+  when: ansible_facts['system']|lower in ['linux', 'darwin']
+
+- name: Copy service account key win.
+  import_tasks: win.yml
+  when: ansible_facts['system']|lower == 'win32nt'
diff --git a/skolo/ansible/switchboard/roles/copy_service_account_key/tasks/posix.yml b/skolo/ansible/switchboard/roles/copy_service_account_key/tasks/posix.yml
new file mode 100644
index 0000000..e2427de
--- /dev/null
+++ b/skolo/ansible/switchboard/roles/copy_service_account_key/tasks/posix.yml
@@ -0,0 +1,17 @@
+- name: Ensure directory for default credentials exists on posix machine.
+  file:
+    path: ~{{ skolo_account }}/.config/gcloud/
+    state: directory
+    owner: '{{ skolo_account }}'
+    group: '{{ skolo_group }}'
+    mode: '0755'
+
+# See https://pkg.go.dev/golang.org/x/oauth2/google#FindDefaultCredentialsWithParams for
+# more details on the default locations that the golang library looks for credentials.
+- name: Copy service account key to posix machine.
+  copy:
+    src: '{{ service_account_key.path }}/application_default_credentials.json'
+    dest: ~{{ skolo_account }}/.config/gcloud/
+    owner: '{{ skolo_account }}'
+    group: '{{ skolo_group }}'
+    mode: '0644'
diff --git a/skolo/ansible/switchboard/roles/copy_service_account_key/tasks/win.yml b/skolo/ansible/switchboard/roles/copy_service_account_key/tasks/win.yml
new file mode 100644
index 0000000..6900ee6
--- /dev/null
+++ b/skolo/ansible/switchboard/roles/copy_service_account_key/tasks/win.yml
@@ -0,0 +1,13 @@
+# We store the credentials file at ${APPDATA}\gcloud\application_default_credentials.json
+#
+# See https://pkg.go.dev/golang.org/x/oauth2/google#FindDefaultCredentialsWithParams for
+# more details.
+- name: Ensure directory for default credentials exists on Windows machine.
+  win_file:
+    path: "{{ ansible_env.APPDATA }}\\gcloud\\"
+    state: directory
+
+- name: Copy service account key to Windows machine.
+  win_copy:
+    src: '{{ service_account_key.path }}/application_default_credentials.json'
+    dest: "{{ ansible_env.APPDATA }}\\gcloud\\"
diff --git a/skolo/ansible/switchboard/roles/install_test_machine_monitor/tasks/linux.yml b/skolo/ansible/switchboard/roles/install_test_machine_monitor/tasks/linux.yml
index 1e08719..0a6eb96 100644
--- a/skolo/ansible/switchboard/roles/install_test_machine_monitor/tasks/linux.yml
+++ b/skolo/ansible/switchboard/roles/install_test_machine_monitor/tasks/linux.yml
@@ -1,3 +1,14 @@
+- name: Copy over executable.
+  become: yes
+  copy:
+    src:
+      "{{ all.repo_root }}/machine/build/{{ ansible_facts['system'] }}/{{
+      ansible_facts['architecture'] }}/test_machine_monitor"
+    dest: /usr/local/bin/test_machine_monitor
+    owner: root
+    group: root
+    mode: 0755
+
 - name: Check that ~chrome-bot/.config/autostart/swarming.desktop exists
   stat:
     path: ~chrome-bot/.config/autostart/swarming.desktop
diff --git a/skolo/ansible/switchboard/roles/install_test_machine_monitor/tasks/mac.yml b/skolo/ansible/switchboard/roles/install_test_machine_monitor/tasks/mac.yml
index 2b9c7ce..cd2612b 100644
--- a/skolo/ansible/switchboard/roles/install_test_machine_monitor/tasks/mac.yml
+++ b/skolo/ansible/switchboard/roles/install_test_machine_monitor/tasks/mac.yml
@@ -1,7 +1,19 @@
+- name: Copy over executable.
+  become: yes
+  copy:
+    src:
+      "{{ all.repo_root }}/machine/build/{{ ansible_facts['system'] }}/{{
+      ansible_facts['architecture'] }}/test_machine_monitor"
+    dest: /usr/local/bin/test_machine_monitor
+    owner: root
+    group: wheel
+    mode: 0755
+
 - name: Remove any freestanding Swarming install.
   when: install_test_machine_monitor__start_swarming
   vars:
-    swarming_launch_plist: ~{{ skolo_account }}/Library/LaunchAgents/org.swarm.bot.plist
+    swarming_launch_plist:
+      ~{{ skolo_account }}/Library/LaunchAgents/org.swarm.bot.plist
   block:
     - name: Stop freestanding startup job.
       # This is idempotent and returns 0, even though it prints an error
@@ -17,7 +29,9 @@
 
 - name: Install test_machine_monitor.
   vars:
-    tmm_launch_plist: ~{{ skolo_account }}/Library/LaunchAgents/com.google.skia.test_machine_monitor.plist
+    tmm_launch_plist:
+      ~{{ skolo_account
+      }}/Library/LaunchAgents/com.google.skia.test_machine_monitor.plist
   block:
     - name: Install startup job.
       template:
diff --git a/skolo/ansible/switchboard/roles/install_test_machine_monitor/tasks/main.yml b/skolo/ansible/switchboard/roles/install_test_machine_monitor/tasks/main.yml
index 0786df1..073d44a 100644
--- a/skolo/ansible/switchboard/roles/install_test_machine_monitor/tasks/main.yml
+++ b/skolo/ansible/switchboard/roles/install_test_machine_monitor/tasks/main.yml
@@ -1,7 +1,7 @@
 - name: Fail if platform unsupported.
   fail:
     msg: This platform is not yet supported.
-  when: "ansible_facts['system']|lower not in ['darwin', 'linux']"
+  when: "ansible_facts['system']|lower not in ['darwin', 'linux', 'win32nt']"
 
 - name: Build test_machine_monitor for the target machine.
   delegate_to: 127.0.0.1
@@ -11,17 +11,6 @@
       "build_test_machine_monitor_{{ ansible_facts['architecture'] }}_{{
       ansible_facts['system'] }}"
 
-- name: Copy over executable.
-  become: yes
-  copy:
-    src:
-      "{{ all.repo_root }}/machine/build/{{ ansible_facts['system'] }}/{{
-      ansible_facts['architecture'] }}/test_machine_monitor"
-    dest: /usr/local/bin/test_machine_monitor
-    owner: root
-    group: "{{ (ansible_facts['system']|lower == 'darwin') | ternary('wheel', 'root') }}"
-    mode: 0755
-
 - name: Install startup job for Linux.
   import_tasks: linux.yml
   when: ansible_facts['system']|lower == 'linux'
@@ -29,3 +18,7 @@
 - name: Install startup job for Mac.
   import_tasks: mac.yml
   when: ansible_facts['system']|lower == 'darwin'
+
+- name: Install startup job for Win.
+  import_tasks: win.yml
+  when: ansible_facts['system']|lower == 'win32nt'
diff --git a/skolo/ansible/switchboard/roles/install_test_machine_monitor/tasks/win.yml b/skolo/ansible/switchboard/roles/install_test_machine_monitor/tasks/win.yml
new file mode 100644
index 0000000..51085bd
--- /dev/null
+++ b/skolo/ansible/switchboard/roles/install_test_machine_monitor/tasks/win.yml
@@ -0,0 +1,50 @@
+- name: Create ~/bin directory if it doesn't exist.
+  ansible.windows.win_file:
+    path: "C:\\Users\\{{ skolo_account }}\\bin"
+    state: directory
+
+- name: Copy over executable.
+  copy:
+    src:
+      "{{ all.repo_root }}/machine/build/{{ ansible_facts['system'] }}/{{
+      ansible_facts['architecture'] }}/test_machine_monitor"
+    # Note we copy it to "...2.exe" which is then moved to "test_machine_monitor.exe"
+    # by the PowerShell script.
+    dest: "C:\\Users\\{{ skolo_account }}\\bin\\test_machine_monitor2.exe"
+
+- name: Copy over PowerShell script that launches test_machine_monitor.
+  template:
+    src: templates/test_machine_monitor.ps1
+    dest: "C:\\Users\\{{ skolo_account }}\\bin\\test_machine_monitor.ps1"
+
+# Note that this task schedules the PowerShell script to run, and not the
+# executable. We do that so we can update the executable if needed, which can't
+# be done when the exe is running.
+- name: Schedule test_machine_monitor for startup.
+  community.windows.win_scheduled_task:
+    name: test_machine_monitor
+    description: Run test_machine_monitor.
+    actions:
+      # Should be path to a PowerShell script that looks for a
+      # test_machine_monitor2.exe to overwrite test_machine_monitor.exe with if
+      # it exists.
+      - path: '{{ ansible_env.SHELL }}'
+        arguments:
+          -executionpolicy bypass "C:\\Users\\{{ skolo_account
+          }}\\bin\\test_machine_monitor.ps1"
+        working_directory: "C:\\Users\\{{ skolo_account }}\\bin\\"
+    triggers:
+      - type: logon
+    logon_type: interactive_token
+    enabled: yes
+    username: '{{ skolo_account }}'
+    hidden: no
+    # 2 ->  Will not start a new instance if another is running.
+    multiple_instances: 2
+    state: present
+    path: \Skia
+    restart_count: 1000
+    # ISO 8601 Duration format for one minute.
+    restart_interval: PT1M
+    # ISO 8601 Duration format for no limit.
+    execution_time_limit: PT0S
diff --git a/skolo/ansible/switchboard/roles/install_test_machine_monitor/templates/test_machine_monitor.ps1 b/skolo/ansible/switchboard/roles/install_test_machine_monitor/templates/test_machine_monitor.ps1
new file mode 100644
index 0000000..883fb32
--- /dev/null
+++ b/skolo/ansible/switchboard/roles/install_test_machine_monitor/templates/test_machine_monitor.ps1
@@ -0,0 +1,34 @@
+# Launches the test_machine_monitor executable, but not before checking if there
+# is an updated executable.
+#
+# Background: On Windows you can't delete or overwrite an executable that is
+# running, so we always write new executables to test_machine_monitor2.exe and
+# then this script, which only runs when test_machine_monitor.exe is not
+# running, can then overwrite test_machine_monitor.exe with
+# test_machine_monitor2.exe.
+
+$newfile = '.\test_machine_monitor2.exe'
+$oldfile = '.\test_machine_monitor.exe'
+
+# If the file exists, move it over test_machine_monitor.exe.
+if (Test-Path -Path $newfile -PathType Leaf) {
+    # Remove the old one if it exists.
+    if (Test-Path -Path $oldfile -PathType Leaf) {
+        Remove-Item -Path $oldfile -Force -ErrorAction Stop
+        Write-Host "The file [$oldfile] has been deleted."
+    }
+
+    # Overwrite the existing test_machine_monitor.exe.
+    Move-Item -Path $newfile -Destination $oldfile
+    Write-Host "[$newfile] has been overwritten."
+}
+else {
+    # If the file does not exist, then run the existing file.
+    Write-Host "Running existing [$oldfile], no newer version found."
+}
+
+# Launch test_machine_monitor.
+.\test_machine_monitor.exe `
+  --config=prod.json `
+  --prom_port=:{{ all.prometheus.monitoring.ports.test_machine_monitor}} `
+  --username=chrome-bot
\ No newline at end of file
diff --git a/skolo/ansible/switchboard/win.yml b/skolo/ansible/switchboard/win.yml
new file mode 100644
index 0000000..e869eb0
--- /dev/null
+++ b/skolo/ansible/switchboard/win.yml
@@ -0,0 +1,8 @@
+- hosts: all_win
+  user: chrome-bot
+  gather_facts: yes
+
+  roles:
+    - copy_authorized_keys
+    - copy_service_account_key
+    - install_test_machine_monitor
diff --git a/switchboard/win.yml b/switchboard/win.yml
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/switchboard/win.yml