Add script to help clean up GCS files
Bug: skia:
Change-Id: I997f71ec1c975b636d54a6dbac40828b9482692c
Reviewed-on: https://skia-review.googlesource.com/c/181222
Commit-Queue: Kevin Lubick <kjlubick@google.com>
Reviewed-by: Ravi Mistry <rmistry@google.com>
diff --git a/scripts/gcr_image_cleanup/gcr_image_cleanup.go b/scripts/gcr_image_cleanup/gcr_image_cleanup.go
index 8aa51b8..cedbd22 100644
--- a/scripts/gcr_image_cleanup/gcr_image_cleanup.go
+++ b/scripts/gcr_image_cleanup/gcr_image_cleanup.go
@@ -35,7 +35,7 @@
os.Exit(1)
}
- oldestDate, err := time.Parse(YMD_FORMAT, *olderThan)
+ oldestDate, err := time.ParseInLocation(YMD_FORMAT, *olderThan, time.UTC)
if err != nil {
fmt.Println("Date must be in YYYY-MM-DD format")
os.Exit(1)
diff --git a/scripts/gcs_cleanup/gcs_cleanup.go b/scripts/gcs_cleanup/gcs_cleanup.go
new file mode 100644
index 0000000..56b244d
--- /dev/null
+++ b/scripts/gcs_cleanup/gcs_cleanup.go
@@ -0,0 +1,114 @@
+package main
+
+// This script will clean up files in GCS that are older
+// than the specified date.
+import (
+ "bufio"
+ "context"
+ "flag"
+ "fmt"
+ "log"
+ "os"
+ os_exec "os/exec"
+ "regexp"
+ "sync"
+ "time"
+
+ "go.skia.org/infra/go/exec"
+)
+
+var (
+ bucket = flag.String("bucket", "", "[REQUIRED] The GCP bucket to clean up.")
+ prefix = flag.String("prefix", "", "The prefix (directory) to clean in")
+ olderThan = flag.String("older_than", "", "[REQUIRED] Date in YYYY-MM-DD of the oldest file to keep.")
+ dryRun = flag.Bool("dry_run", false, "Print out those files that would be deleted instead of actually deleting them.")
+
+ deleteThreads = flag.Int("delete_threads", 16, "How many files to simultaneously delete")
+)
+
+const YMD_FORMAT = "2006-01-02"
+const NO_MORE_FILES = "NO_MORE_FILES"
+
+var fileLine = regexp.MustCompile(`^\s+\d+\s+(?P<date>\S+)\s+(?P<file>\S+)`)
+
+func main() {
+ flag.Parse()
+ if *bucket == "" || *olderThan == "" {
+ fmt.Println("--bucket and --older_than are required")
+ flag.PrintDefaults()
+ os.Exit(1)
+ }
+
+ oldestDate, err := time.ParseInLocation(YMD_FORMAT, *olderThan, time.UTC)
+ if err != nil {
+ fmt.Println("Date must be in YYYY-MM-DD format")
+ os.Exit(1)
+ }
+
+ search := fmt.Sprintf("%s/%s", *bucket, *prefix)
+ fmt.Printf("Searching for files in %s\n", search)
+
+ files := make(chan string, 10000)
+ wg := sync.WaitGroup{}
+ wg.Add(*deleteThreads)
+ for i := 0; i < *deleteThreads; i++ {
+ go deleteHelper(files, &wg)
+ }
+
+ cmd := os_exec.Command("gsutil", "ls", "-r", "-l", search)
+ stdout, err := cmd.StdoutPipe()
+ if err != nil {
+ log.Fatal(err)
+ }
+ if err := cmd.Start(); err != nil {
+ log.Fatal(err)
+ }
+ scanner := bufio.NewScanner(stdout)
+ for scanner.Scan() {
+ line := scanner.Text()
+ if match := fileLine.FindStringSubmatch(line); match != nil {
+ // match[1] is the date, formatted like 2016-12-08T05:00:29Z
+ // match[2] is the file name
+ d, err := time.ParseInLocation(time.RFC3339, match[1], time.UTC)
+ if err != nil {
+ fmt.Printf("Invalid date in line %s, %s\n", line, err)
+ continue
+ }
+ if d.Before(oldestDate) {
+ files <- match[2]
+ }
+ }
+ }
+ if err = cmd.Wait(); err != nil {
+ fmt.Printf("Listing failed, going to finish deleting files: %s\n", err)
+ }
+ fmt.Printf("Enumerated all files, waiting to delete %d more files\n", len(files))
+ for i := 0; i < *deleteThreads; i++ {
+ files <- NO_MORE_FILES
+ }
+ wg.Wait()
+ fmt.Println("done")
+}
+
+func deleteHelper(files chan string, wg *sync.WaitGroup) {
+ defer wg.Done()
+ for {
+ toDelete := <-files
+ if toDelete == NO_MORE_FILES {
+ return
+ }
+ if *dryRun {
+ fmt.Printf("dry deleted %s\n", toDelete)
+ } else {
+ err := exec.Run(context.Background(), &exec.Command{
+ Name: "gsutil",
+ Args: []string{"rm", toDelete},
+ })
+ if err != nil {
+ fmt.Printf("Could not delete %s: %s\n", toDelete, err)
+ } else {
+ fmt.Printf("Deleted %s\n", toDelete)
+ }
+ }
+ }
+}