| // Copyright 2022 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // This task driver takes a binary (e.g. "dm") built by a Build-* task (e.g. |
| // "Build-Debian10-Clang-x86_64-Release"), runs Bloaty against the binary, and uploads the resulting |
| // code size statistics to the GCS bucket belonging to the https://codesize.skia.org service. |
| // |
| // When running as a tryjob, this task driver performs a size diff of said binary built at the |
| // tryjob's changelist/patchset vs. built at tip-of-tree. The binary built at tip-of-tree is |
| // produced by a *-NoPatch task (e.g. "Build-Debian10-Clang-x86_64-Release-NoPatch"), whereas the |
| // binary built at the tryjob's changelist/patchset is produced by a task of the same name except |
| // without the "-NoPatch" suffix (e.g. "Build-Debian10-Clang-x86_64-Release"). The size diff is |
| // calculated using Bloaty, see |
| // https://github.com/google/bloaty/blob/f01ea59bdda11708d74a3826c23d6e2db6c996f0/doc/using.md#size-diffs. |
| // The resulting diff is uploaded to the GCS bucket belonging to the https://codesize.skia.org |
| // service. |
| package main |
| |
| import ( |
| "context" |
| "encoding/json" |
| "flag" |
| "fmt" |
| "os" |
| "path/filepath" |
| "strconv" |
| "strings" |
| "time" |
| |
| "cloud.google.com/go/storage" |
| "google.golang.org/api/option" |
| |
| "go.skia.org/infra/go/auth" |
| "go.skia.org/infra/go/exec" |
| "go.skia.org/infra/go/gcs" |
| "go.skia.org/infra/go/gcs/gcsclient" |
| "go.skia.org/infra/go/gerrit" |
| "go.skia.org/infra/go/gitiles" |
| "go.skia.org/infra/go/now" |
| "go.skia.org/infra/go/skerr" |
| "go.skia.org/infra/perf/go/ingest/format" |
| "go.skia.org/infra/task_driver/go/lib/auth_steps" |
| "go.skia.org/infra/task_driver/go/lib/checkout" |
| "go.skia.org/infra/task_driver/go/lib/os_steps" |
| "go.skia.org/infra/task_driver/go/td" |
| "go.skia.org/infra/task_scheduler/go/types" |
| ) |
| |
| const ( |
| codesizeGCSBucketName = "skia-codesize" |
| perfGCSBucketName = "skia-perf" |
| taskdriverURL = "https://task-driver.skia.org/td/" |
| ) |
| |
| // BloatyOutputMetadata contains the Bloaty version and command-line arguments used, and metadata |
| // about the task where Bloaty was invoked. This struct is serialized into a JSON file that is |
| // uploaded to GCS alongside the Bloaty output file. |
| // |
| // TODO(lovisolo): Move this struct to the buildbot repository. |
| type BloatyOutputMetadata struct { |
| Version int `json:"version"` // Schema version of this file, starting at 1. |
| Timestamp string `json:"timestamp"` |
| |
| SwarmingTaskID string `json:"swarming_task_id"` |
| SwarmingServer string `json:"swarming_server"` |
| |
| TaskID string `json:"task_id"` |
| TaskName string `json:"task_name"` |
| CompileTaskName string `json:"compile_task_name"` |
| // CompileTaskNameNoPatch should only be set for tryjobs. |
| CompileTaskNameNoPatch string `json:"compile_task_name_no_patch,omitempty"` |
| BinaryName string `json:"binary_name"` |
| |
| BloatyCipdVersion string `json:"bloaty_cipd_version"` |
| BloatyArgs []string `json:"bloaty_args"` |
| // BloatyDiffArgs should only be set for tryjobs. |
| BloatyDiffArgs []string `json:"bloaty_diff_args,omitempty"` |
| |
| PatchIssue string `json:"patch_issue"` |
| PatchServer string `json:"patch_server"` |
| PatchSet string `json:"patch_set"` |
| Repo string `json:"repo"` |
| Revision string `json:"revision"` |
| |
| CommitTimestamp string `json:"commit_timestamp"` |
| Author string `json:"author"` |
| Subject string `json:"subject"` |
| } |
| |
| func main() { |
| var ( |
| projectID = flag.String("project_id", "", "ID of the Google Cloud project.") |
| taskID = flag.String("task_id", "", "ID of this task.") |
| taskName = flag.String("task_name", "", "Name of the task.") |
| compileTaskName = flag.String("compile_task_name", "", "Name of the compile task that produced the binary to analyze.") |
| compileTaskNameNoPatch = flag.String("compile_task_name_no_patch", "", "Name of the *-NoPatch compile task that produced the binary to diff against (ignored when the task is not a tryjob).") |
| binaryName = flag.String("binary_name", "", "Name of the binary to analyze (e.g. \"dm\").") |
| bloatyCIPDVersion = flag.String("bloaty_cipd_version", "", "Version of the \"bloaty\" CIPD package used.") |
| bloatyBinary = flag.String("bloaty_binary", "", "Path to the bloaty binary.") |
| stripBinary = flag.String("strip_binary", "", "Path to the strip binary (part of binutils).") |
| output = flag.String("o", "", "If provided, dump a JSON blob of step data to the given file. Prints to stdout if '-' is given.") |
| local = flag.Bool("local", true, "True if running locally (as opposed to on the bots).") |
| |
| checkoutFlags = checkout.SetupFlags(nil) |
| ) |
| ctx := td.StartRun(projectID, taskID, taskName, output, local) |
| defer td.EndRun(ctx) |
| |
| if *bloatyBinary == "" || *stripBinary == "" { |
| td.Fatal(ctx, skerr.Fmt("Must specify --bloaty_binary and --strip_binary")) |
| } |
| |
| // The repository state contains the commit hash and patch/patchset if available. |
| repoState, err := checkout.GetRepoState(checkoutFlags) |
| if err != nil { |
| td.Fatal(ctx, skerr.Wrap(err)) |
| } |
| |
| // Make an HTTP client with the required permissions to hit GCS, Gerrit and Gitiles. |
| httpClient, _, err := auth_steps.InitHttpClient(ctx, *local, auth.ScopeReadWrite, gerrit.AuthScope, auth.ScopeUserinfoEmail) |
| if err != nil { |
| td.Fatal(ctx, skerr.Wrap(err)) |
| } |
| |
| // Make a GCS client with the required permissions to upload to the codesize.skia.org GCS bucket. |
| store, err := storage.NewClient(ctx, option.WithHTTPClient(httpClient)) |
| if err != nil { |
| td.Fatal(ctx, skerr.Wrap(err)) |
| } |
| codesizeGCS := gcsclient.New(store, codesizeGCSBucketName) |
| perfGCS := gcsclient.New(store, perfGCSBucketName) |
| |
| // Make a Gerrit client. |
| gerritClient, err := gerrit.NewGerrit(repoState.Server, httpClient) |
| if err != nil { |
| td.Fatal(ctx, skerr.Wrap(err)) |
| } |
| |
| // Make a Gitiles client. |
| gitilesRepo := gitiles.NewRepo(repoState.Repo, httpClient) |
| |
| args := runStepsArgs{ |
| repoState: repoState, |
| gerrit: gerritClient, |
| gitilesRepo: gitilesRepo, |
| codesizeGCS: codesizeGCS, |
| perfGCS: perfGCS, |
| swarmingTaskID: os.Getenv("SWARMING_TASK_ID"), |
| swarmingServer: os.Getenv("SWARMING_SERVER"), |
| taskID: *taskID, |
| taskName: *taskName, |
| compileTaskName: *compileTaskName, |
| compileTaskNameNoPatch: *compileTaskNameNoPatch, |
| binaryName: *binaryName, |
| bloatyPath: *bloatyBinary, |
| bloatyCIPDVersion: *bloatyCIPDVersion, |
| stripPath: *stripBinary, |
| } |
| |
| if err := runSteps(ctx, args); err != nil { |
| td.Fatal(ctx, skerr.Wrap(err)) |
| } |
| } |
| |
| // runStepsArgs contains the input arguments to the runSteps function. |
| type runStepsArgs struct { |
| repoState types.RepoState |
| gerrit *gerrit.Gerrit |
| gitilesRepo gitiles.GitilesRepo |
| codesizeGCS gcs.GCSClient |
| perfGCS gcs.GCSClient |
| swarmingTaskID string |
| swarmingServer string |
| taskID string |
| taskName string |
| compileTaskName string |
| compileTaskNameNoPatch string |
| binaryName string |
| bloatyCIPDVersion string |
| bloatyPath string |
| stripPath string |
| } |
| |
| // runSteps runs the main steps of this task driver. |
| func runSteps(ctx context.Context, args runStepsArgs) error { |
| var ( |
| author string |
| subject string |
| commitTimestamp string |
| ) |
| |
| // Read the CL subject, author and timestamp. We talk to Gerrit when running as a tryjob, or to |
| // Gitiles when running as a post-submit task. |
| if args.repoState.IsTryJob() { |
| issue, err := strconv.ParseInt(args.repoState.Issue, 10, 64) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| patchset, err := strconv.ParseInt(args.repoState.Patchset, 10, 64) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| changeInfo, err := args.gerrit.GetIssueProperties(ctx, issue) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| // This matches the format of the author field returned by Gitiles. |
| author = fmt.Sprintf("%s (%s)", changeInfo.Owner.Name, changeInfo.Owner.Email) |
| subject = changeInfo.Subject |
| for _, revision := range changeInfo.Revisions { |
| if revision.Number == patchset { |
| commitTimestamp = revision.CreatedString |
| break |
| } |
| } |
| } else { |
| longCommit, err := args.gitilesRepo.Details(ctx, args.repoState.Revision) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| author = longCommit.Author |
| subject = longCommit.Subject |
| commitTimestamp = longCommit.Timestamp.Format(time.RFC3339) |
| } |
| |
| // Run Bloaty and capture its output. |
| bloatyOutput, bloatyArgs, err := runBloaty(ctx, args.stripPath, args.bloatyPath, args.binaryName) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| |
| // Build metadata structure. |
| metadata := &BloatyOutputMetadata{ |
| Version: 1, |
| Timestamp: now.Now(ctx).UTC().Format(time.RFC3339), |
| SwarmingTaskID: args.swarmingTaskID, |
| SwarmingServer: args.swarmingServer, |
| TaskID: args.taskID, |
| TaskName: args.taskName, |
| CompileTaskName: args.compileTaskName, |
| BinaryName: args.binaryName, |
| BloatyCipdVersion: args.bloatyCIPDVersion, |
| BloatyArgs: bloatyArgs, |
| PatchIssue: args.repoState.Issue, |
| PatchServer: args.repoState.Server, |
| PatchSet: args.repoState.Patchset, |
| Repo: args.repoState.Repo, |
| Revision: args.repoState.Revision, |
| CommitTimestamp: commitTimestamp, |
| Author: author, |
| Subject: subject, |
| } |
| |
| var bloatyDiffOutput string |
| // Diff the binary built at the current changelist/patchset vs. at tip-of-tree. |
| bloatyDiffOutput, metadata.BloatyDiffArgs, err = runBloatyDiff(ctx, args.stripPath, args.bloatyPath, args.binaryName) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| metadata.CompileTaskNameNoPatch = args.compileTaskNameNoPatch |
| |
| gcsDir := computeTargetGCSDirectory(ctx, args.repoState, args.taskID, args.compileTaskName) |
| |
| // Upload pretty-printed JSON metadata file to GCS. |
| jsonMetadata, err := json.MarshalIndent(metadata, "", " ") |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| if err = uploadFileToGCS(ctx, args.codesizeGCS, fmt.Sprintf("%s/%s.json", gcsDir, args.binaryName), jsonMetadata); err != nil { |
| return skerr.Wrap(err) |
| } |
| |
| // Upload Bloaty diff output plain-text file to GCS. |
| if err = uploadFileToGCS(ctx, args.codesizeGCS, fmt.Sprintf("%s/%s.diff.txt", gcsDir, args.binaryName), []byte(bloatyDiffOutput)); err != nil { |
| return skerr.Wrap(err) |
| } |
| |
| // Upload Bloaty output .tsv file to GCS. |
| // |
| // It is important that we upload the .tsv file last because the codesizeserver binary will |
| // only start processing the .json and .diff.txt files once it receives the Pub/Sub |
| // notification that a .tsv file has been uploaded. Pub/Sub notifications are pretty quick, so |
| // by uploading files in this order we avoid a race condition. |
| if err = uploadFileToGCS(ctx, args.codesizeGCS, fmt.Sprintf("%s/%s.tsv", gcsDir, args.binaryName), []byte(bloatyOutput)); err != nil { |
| return skerr.Wrap(err) |
| } |
| if args.repoState.IsTryJob() { |
| // Add VM and file diff results to the step data. This is consumed by the codesize plugin |
| // to display results on the Gerrit CL for tryjob runs. |
| vmDiff, fileDiff := parseBloatyDiffOutput(bloatyDiffOutput) |
| if vmDiff != "" && fileDiff != "" { |
| td.StepText(ctx, "VM Diff", vmDiff) |
| td.StepText(ctx, "File Diff", fileDiff) |
| } |
| |
| // TODO(rmistry): Remove the below "Diff Bytes" section after the above |
| // works and is integrated with the codesize plugin. |
| s, err := os_steps.Stat(ctx, filepath.Join("build", args.binaryName+"_stripped")) |
| if err != nil { |
| return err |
| } |
| totalBytes := s.Size() |
| |
| s, err = os_steps.Stat(ctx, filepath.Join("build_nopatch", args.binaryName+"_stripped")) |
| if err != nil { |
| return err |
| } |
| beforeBytes := s.Size() |
| |
| diffBytes := totalBytes - beforeBytes |
| td.StepText(ctx, "Diff Bytes", strconv.FormatInt(diffBytes, 10)) |
| } else { |
| // Upload perf data for non-tryjob runs on status.skia.org. |
| perfData := format.Format{ |
| Version: 1, |
| GitHash: args.repoState.Revision, |
| Key: map[string]string{ |
| "binary": args.binaryName, |
| "compile_task_name": args.compileTaskName, |
| }, |
| Links: map[string]string{ |
| "full_data": taskdriverURL + args.taskID, |
| }, |
| } |
| if err = uploadPerfData(ctx, args.perfGCS, gcsDir, args.binaryName, args.taskID, perfData); err != nil { |
| return skerr.Wrap(err) |
| } |
| } |
| |
| return nil |
| } |
| |
| // parseBloatyDiffOutput parses bloaty output and returns the VM diff |
| // and the file diff strings. |
| // Example: for "...\n...\n+0.0% +832 TOTAL +848Ki +0.0%\n\n" we return |
| // (+832, +848Ki). |
| // If the output is not in expected format then we return empty strings. |
| func parseBloatyDiffOutput(bloatyDiffOutput string) (string, string) { |
| tokens := strings.Split(strings.Trim(bloatyDiffOutput, "\n"), "\n") |
| if len(tokens) > 0 { |
| // Final line in bloaty output is the line with the results. |
| outputLine := tokens[len(tokens)-1] |
| words := strings.Fields(outputLine) |
| // Format is expected to look like this: |
| // +0.0% +832 TOTAL +848 +0.0% |
| if len(words) == 5 { |
| return words[1], words[3] |
| } |
| } |
| return "", "" |
| } |
| |
| // runBloaty runs Bloaty against the given binary and returns the Bloaty output in TSV format and |
| // the Bloaty command-line arguments used. It uses the strip command to strip out debug symbols, |
| // so they do not inflate the file size numbers. |
| func runBloaty(ctx context.Context, stripPath, bloatyPath, binaryName string) (string, []string, error) { |
| binaryWithSymbols := filepath.Join("build", binaryName) |
| binaryNoSymbols := filepath.Join("build", binaryName+"_stripped") |
| err := td.Do(ctx, td.Props("Create stripped version of binary"), func(ctx context.Context) error { |
| runCmd := &exec.Command{ |
| Name: "cp", |
| Args: []string{binaryWithSymbols, binaryNoSymbols}, |
| InheritEnv: true, |
| LogStdout: true, |
| LogStderr: true, |
| } |
| _, err := exec.RunCommand(ctx, runCmd) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| runCmd = &exec.Command{ |
| Name: stripPath, |
| Args: []string{binaryNoSymbols}, |
| InheritEnv: true, |
| LogStdout: true, |
| LogStderr: true, |
| } |
| _, err = exec.RunCommand(ctx, runCmd) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| runCmd = &exec.Command{ |
| Name: "ls", |
| Args: []string{"-al", "build"}, |
| InheritEnv: true, |
| LogStdout: true, |
| LogStderr: true, |
| } |
| _, err = exec.RunCommand(ctx, runCmd) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| |
| return nil |
| }) |
| if err != nil { |
| return "", nil, skerr.Wrap(err) |
| } |
| |
| runCmd := &exec.Command{ |
| Name: bloatyPath, |
| Args: []string{ |
| binaryNoSymbols, |
| "-d", |
| "compileunits,symbols", |
| "-n", |
| "0", |
| "--tsv", |
| "--debug-file=" + binaryWithSymbols, |
| }, |
| InheritEnv: true, |
| LogStdout: true, |
| LogStderr: true, |
| } |
| |
| var bloatyOutput string |
| |
| if err := td.Do(ctx, td.Props(fmt.Sprintf("Run Bloaty against binary %q", binaryName)), func(ctx context.Context) error { |
| bloatyOutput, err = exec.RunCommand(ctx, runCmd) |
| return err |
| }); err != nil { |
| return "", nil, skerr.Wrap(err) |
| } |
| |
| return bloatyOutput, runCmd.Args, nil |
| } |
| |
| // runBloatyDiff invokes Bloaty to diff the given binary built at the current changelist/patchset |
| // vs. at tip of tree, and returns the plain-text Bloaty output and the command-line arguments |
| // used. Like before, it strips the debug symbols out before computing that diff. |
| func runBloatyDiff(ctx context.Context, stripPath, bloatyPath, binaryName string) (string, []string, error) { |
| // These were created from the runBloaty step |
| binaryWithPatchWithSymbols := filepath.Join("build", binaryName) |
| binaryWithPatchWithNoSymbols := filepath.Join("build", binaryName+"_stripped") |
| // These will be created next |
| binaryWithNoPatchWithSymbols := filepath.Join("build_nopatch", binaryName) |
| binaryWithNoPatchWithNoSymbols := filepath.Join("build_nopatch", binaryName+"_stripped") |
| err := td.Do(ctx, td.Props("Create stripped version of no_patch binary"), func(ctx context.Context) error { |
| runCmd := &exec.Command{ |
| Name: "cp", |
| Args: []string{binaryWithNoPatchWithSymbols, binaryWithNoPatchWithNoSymbols}, |
| InheritEnv: true, |
| LogStdout: true, |
| LogStderr: true, |
| } |
| _, err := exec.RunCommand(ctx, runCmd) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| runCmd = &exec.Command{ |
| Name: stripPath, |
| Args: []string{binaryWithNoPatchWithNoSymbols}, |
| InheritEnv: true, |
| LogStdout: true, |
| LogStderr: true, |
| } |
| _, err = exec.RunCommand(ctx, runCmd) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| runCmd = &exec.Command{ |
| Name: "ls", |
| Args: []string{"-al", "build_nopatch"}, |
| InheritEnv: true, |
| LogStdout: true, |
| LogStderr: true, |
| } |
| _, err = exec.RunCommand(ctx, runCmd) |
| return err |
| }) |
| if err != nil { |
| return "", nil, skerr.Wrap(err) |
| } |
| |
| runCmd := &exec.Command{ |
| Name: bloatyPath, |
| Args: []string{ |
| binaryWithPatchWithNoSymbols, |
| "--debug-file=" + binaryWithPatchWithSymbols, |
| "-d", "symbols", "-n", "0", "-s", "file", |
| "--", |
| binaryWithNoPatchWithNoSymbols, |
| "--debug-file=" + binaryWithNoPatchWithSymbols, |
| }, |
| InheritEnv: true, |
| LogStdout: true, |
| LogStderr: true, |
| } |
| |
| var bloatyOutput string |
| if err := td.Do(ctx, td.Props(fmt.Sprintf("Run Bloaty diff against binary %q", binaryName)), func(ctx context.Context) error { |
| bloatyOutput, err = exec.RunCommand(ctx, runCmd) |
| return err |
| }); err != nil { |
| return "", nil, skerr.Wrap(err) |
| } |
| |
| return bloatyOutput, runCmd.Args, nil |
| } |
| |
| // computeTargetGCSDirectory computes the target GCS directory where to upload the Bloaty output file |
| // and JSON metadata file. |
| func computeTargetGCSDirectory(ctx context.Context, repoState types.RepoState, taskID, compileTaskName string) string { |
| timePrefix := now.Now(ctx).UTC().Format("2006/01/02/15") // YYYY/MM/DD/HH. |
| if repoState.IsTryJob() { |
| // Example: 2022/01/31/01/tryjob/12345/3/CkPp9ElAaEXyYWNHpXHU/Build-Debian10-Clang-x86_64-Release |
| return fmt.Sprintf("%s/tryjob/%s/%s/%s/%s", timePrefix, repoState.Patch.Issue, repoState.Patch.Patchset, taskID, compileTaskName) |
| } else { |
| // Example: 2022/01/31/01/033ccea12c0949d0f712471bfcb4ed6daf69aaff/Build-Debian10-Clang-x86_64-Release |
| return fmt.Sprintf("%s/%s/%s", timePrefix, repoState.Revision, compileTaskName) |
| } |
| } |
| |
| // uploadPerfData gets the file size of the stripped binary (i.e. without debug symbols), formats |
| // the JSON how Perf expects it, and uploads it to Perf's GCS bucket. |
| func uploadPerfData(ctx context.Context, perfGCS gcs.GCSClient, gcsPathPrefix, binaryName, taskID string, perfData format.Format) error { |
| // Use the taskID to guarantee unique file ids |
| gcsPath := "nano-json-v1/" + gcsPathPrefix + "/codesize_" + taskID + ".json" |
| |
| err := td.Do(ctx, td.Props("Upload total stripped binary size to Perf"), func(ctx context.Context) error { |
| s, err := os_steps.Stat(ctx, filepath.Join("build", binaryName+"_stripped")) |
| if err != nil { |
| return err |
| } |
| totalBytes := s.Size() |
| |
| s, err = os_steps.Stat(ctx, filepath.Join("build_nopatch", binaryName+"_stripped")) |
| if err != nil { |
| return err |
| } |
| beforeBytes := s.Size() |
| |
| perfData.Results = []format.Result{{ |
| Key: map[string]string{"measurement": "stripped_binary_bytes"}, |
| Measurement: float32(totalBytes), |
| }, { |
| Key: map[string]string{"measurement": "stripped_diff_bytes"}, |
| Measurement: float32(totalBytes - beforeBytes), |
| }} |
| |
| perfJSON, err := json.MarshalIndent(perfData, "", " ") |
| if err != nil { |
| return err |
| } |
| return uploadFileToGCS(ctx, perfGCS, gcsPath, perfJSON) |
| }) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| return nil |
| } |
| |
| // uploadFileToGCS uploads a file to the given GCS bucket. |
| func uploadFileToGCS(ctx context.Context, gcsClient gcs.GCSClient, path string, contents []byte) error { |
| gcsURL := fmt.Sprintf("gs://%s/%s", gcsClient.Bucket(), path) |
| return td.Do(ctx, td.Props(fmt.Sprintf("Upload %s", gcsURL)), func(ctx context.Context) error { |
| if err := gcsClient.SetFileContents(ctx, path, gcs.FILE_WRITE_OPTS_TEXT, contents); err != nil { |
| return skerr.Wrapf(err, "Could not write task to %s", gcsURL) |
| } |
| return nil |
| }) |
| } |