|  | // Package cq provides tools for interacting with the CQ tools. | 
|  | package cq | 
|  |  | 
|  | import ( | 
|  | "context" | 
|  | "fmt" | 
|  | "sync" | 
|  | "time" | 
|  |  | 
|  | "github.com/golang/protobuf/ptypes" | 
|  | buildbucketpb "go.chromium.org/luci/buildbucket/proto" | 
|  |  | 
|  | "go.skia.org/infra/go/common" | 
|  | "go.skia.org/infra/go/gerrit" | 
|  | "go.skia.org/infra/go/git" | 
|  | "go.skia.org/infra/go/git/git_common" | 
|  | "go.skia.org/infra/go/gitiles" | 
|  | "go.skia.org/infra/go/metrics2" | 
|  | "go.skia.org/infra/go/skerr" | 
|  | "go.skia.org/infra/go/sklog" | 
|  | "go.skia.org/infra/go/util" | 
|  | "go.skia.org/infra/task_scheduler/go/specs" | 
|  | ) | 
|  |  | 
|  | const ( | 
|  | MAIN_REF = git_common.RefsHeadsPrefix + git.MainBranch | 
|  |  | 
|  | // Constants for in-flight metrics. | 
|  | INFLIGHT_METRIC_NAME     = "in_flight" | 
|  | INFLIGHT_TRYBOT_DURATION = "trybot_duration" | 
|  | INFLIGHT_TRYBOT_NUM      = "trybot_num" | 
|  | INFLIGHT_WAITING_IN_CQ   = "waiting_in_cq" | 
|  |  | 
|  | // Constants for landed metrics. | 
|  | LANDED_METRIC_NAME     = "after_commit" | 
|  | LANDED_TRYBOT_DURATION = "trybot_duration" | 
|  | LANDED_TOTAL_DURATION  = "total_duration" | 
|  | ) | 
|  |  | 
|  | var ( | 
|  | // Slice of all known presubmit bot names. | 
|  | PRESUBMIT_BOTS = []string{"skia_presubmit-Trybot"} | 
|  |  | 
|  | // Mutext to control access to the slice of CQ trybots. | 
|  | cqTryBotsMutex sync.RWMutex | 
|  | ) | 
|  |  | 
|  | // NewClient creates a new client for interacting with CQ tools. | 
|  | func NewClient(gerritClient *gerrit.Gerrit, cqTryBotsFunc GetCQTryBotsFn, metricName string) (*Client, error) { | 
|  | cqTryBots, err := cqTryBotsFunc() | 
|  | if err != nil { | 
|  | return nil, err | 
|  | } | 
|  | return &Client{gerritClient, util.NewStringSet(cqTryBots), cqTryBotsFunc, metricName}, err | 
|  | } | 
|  |  | 
|  | // GetCQTryBotsFn is an interface for returing the CQ trybots of a project. | 
|  | type GetCQTryBotsFn func() ([]string, error) | 
|  |  | 
|  | type Client struct { | 
|  | gerritClient  *gerrit.Gerrit | 
|  | cqTryBots     util.StringSet | 
|  | cqTryBotsFunc GetCQTryBotsFn | 
|  | metricName    string | 
|  | } | 
|  |  | 
|  | // GetSkiaCQTryBots is a Skia implementation of GetCQTryBotsFn. | 
|  | func GetSkiaCQTryBots() ([]string, error) { | 
|  | cfg, err := GetCQJobsToConfig(gitiles.NewRepo(common.REPO_SKIA, nil), MAIN_REF) | 
|  | if err != nil { | 
|  | return nil, err | 
|  | } | 
|  | return GetCQTryBots(cfg) | 
|  | } | 
|  |  | 
|  | // GetSkiaInfraCQTryBots is a Skia Infra implementation of GetCQTryBotsFn. | 
|  | func GetSkiaInfraCQTryBots() ([]string, error) { | 
|  | cfg, err := GetCQJobsToConfig(gitiles.NewRepo(common.REPO_SKIA_INFRA, nil), MAIN_REF) | 
|  | if err != nil { | 
|  | return nil, err | 
|  | } | 
|  | return GetCQTryBots(cfg) | 
|  | } | 
|  |  | 
|  | // GetCQJobsToConfig returns the Config for the given repo. | 
|  | func GetCQJobsToConfig(repo *gitiles.Repo, ref string) (map[string]*specs.CommitQueueJobConfig, error) { | 
|  | contents, err := repo.ReadFileAtRef(context.Background(), specs.TASKS_CFG_FILE, ref) | 
|  | if err != nil { | 
|  | return nil, err | 
|  | } | 
|  | tasksCfg, err := specs.ParseTasksCfg(string(contents)) | 
|  | if err != nil { | 
|  | return nil, skerr.Wrapf(err, "Error when parsing tasks.json cfg") | 
|  | } | 
|  | return tasksCfg.CommitQueue, nil | 
|  | } | 
|  |  | 
|  | // GetCQTryBots is a convenience method for retrieving the list of CQ trybots | 
|  | // from a Config. | 
|  | func GetCQTryBots(cqJobsToConfig map[string]*specs.CommitQueueJobConfig) ([]string, error) { | 
|  | tryJobs := []string{} | 
|  | for tryJob := range cqJobsToConfig { | 
|  | if util.ContainsAny(tryJob, PRESUBMIT_BOTS) { | 
|  | // Exclude presubmit bots because they could fail or be delayed | 
|  | // due to factors such as owners approval and other project | 
|  | // specific checks. | 
|  | continue | 
|  | } | 
|  | tryJobs = append(tryJobs, tryJob) | 
|  | } | 
|  |  | 
|  | sklog.Infof("The list of CQ trybots is: %s", tryJobs) | 
|  | return tryJobs, nil | 
|  | } | 
|  |  | 
|  | // RefreshCQTryBots refreshes the slice of CQ trybots on the instance. Access | 
|  | // to the trybots is protected by a RWMutex. | 
|  | func (c *Client) RefreshCQTryBots() error { | 
|  | tryBots, err := c.cqTryBotsFunc() | 
|  | if err != nil { | 
|  | return err | 
|  | } | 
|  | cqTryBotsMutex.Lock() | 
|  | defer cqTryBotsMutex.Unlock() | 
|  | c.cqTryBots = util.NewStringSet(tryBots) | 
|  | return nil | 
|  | } | 
|  |  | 
|  | // ReportCQStats reports all relevant stats for the specified Gerrit change. | 
|  | // Note: Different stats are reported depending on whether the change has been | 
|  | // merged or not. | 
|  | // All created metrics will be registered in reportedMetrics. | 
|  | func (c *Client) ReportCQStats(ctx context.Context, change int64, reportedMetrics map[metrics2.Int64Metric]struct{}) error { | 
|  | changeInfo, err := c.gerritClient.GetIssueProperties(ctx, change) | 
|  | if err != nil { | 
|  | return err | 
|  | } | 
|  | patchsetIds := changeInfo.GetPatchsetIDs() | 
|  | latestPatchsetId := patchsetIds[len(patchsetIds)-1] | 
|  | if changeInfo.Committed { | 
|  | // TODO(rmistry): The last patchset in Gerrit does not contain trybot | 
|  | // information so we have to look at the one immediately before it. | 
|  | // This will be fixed with crbug.com/634944. | 
|  | latestPatchsetId = patchsetIds[len(patchsetIds)-2] | 
|  | } | 
|  |  | 
|  | builds, err := c.gerritClient.GetTrybotResults(ctx, change, latestPatchsetId) | 
|  | if err != nil { | 
|  | return err | 
|  | } | 
|  | // Consider only CQ bots. | 
|  | cqBuilds := []*buildbucketpb.Build{} | 
|  | for _, b := range builds { | 
|  | if c.isCQTryBot(b.Builder.Builder) { | 
|  | cqBuilds = append(cqBuilds, b) | 
|  | } | 
|  | } | 
|  | gerritURL := fmt.Sprintf("%s/c/%d/%d", gerrit.GerritSkiaURL, change, latestPatchsetId) | 
|  | if len(cqBuilds) == 0 { | 
|  | sklog.Infof("No trybot results were found for %s", gerritURL) | 
|  | return nil | 
|  | } | 
|  |  | 
|  | sklog.Infof("Starting processing %s. Merged status: %t", gerritURL, changeInfo.Committed) | 
|  |  | 
|  | if changeInfo.Committed { | 
|  | c.ReportCQStatsForLandedCL(cqBuilds, gerritURL, reportedMetrics) | 
|  | } else { | 
|  | c.ReportCQStatsForInFlightCL(cqBuilds, gerritURL, reportedMetrics) | 
|  | } | 
|  | return nil | 
|  | } | 
|  |  | 
|  | // ReportCQStatsForLandedCL reports the following metrics for the specified | 
|  | // change and patchsetID: | 
|  | // * The total time the change spent waiting for CQ trybots to complete. | 
|  | // * The time each CQ trybot took to complete. | 
|  | // All created metrics will be registered in reportedMetrics. | 
|  | func (c *Client) ReportCQStatsForLandedCL(cqBuilds []*buildbucketpb.Build, gerritURL string, reportedMetrics map[metrics2.Int64Metric]struct{}) { | 
|  | endTimeOfCQBots := time.Time{} | 
|  | maximumTrybotDuration := int64(0) | 
|  | for _, b := range cqBuilds { | 
|  | createdTime, err := ptypes.Timestamp(b.CreateTime) | 
|  | if err != nil { | 
|  | sklog.Errorf("Failed to convert timestamp for %d; skipping: %s", b.Id, err) | 
|  | continue | 
|  | } | 
|  | createdTime = createdTime.UTC() | 
|  | if b.EndTime == nil { | 
|  | sklog.Warningf("Skipping %s on %s. The correct completed time has not shown up in Buildbucket yet.", b.Builder.Builder, gerritURL) | 
|  | continue | 
|  | } | 
|  | completedTime, err := ptypes.Timestamp(b.EndTime) | 
|  | if err != nil { | 
|  | sklog.Errorf("Failed to convert timestamp for %d; skipping: %s", b.Id, err) | 
|  | continue | 
|  | } | 
|  | completedTime = completedTime.UTC() | 
|  | if endTimeOfCQBots.Before(completedTime) { | 
|  | endTimeOfCQBots = completedTime | 
|  | } | 
|  |  | 
|  | duration := int64(completedTime.Sub(createdTime).Seconds()) | 
|  | sklog.Infof("%s was created at %s by %s and completed at %s. Total duration: %d", b.Builder.Builder, createdTime, gerritURL, completedTime, duration) | 
|  | landedTrybotDurationMetric := c.getLandedTrybotDurationMetric(b.Builder.Builder, gerritURL) | 
|  | landedTrybotDurationMetric.Update(duration) | 
|  | reportedMetrics[landedTrybotDurationMetric] = struct{}{} | 
|  |  | 
|  | if duration > maximumTrybotDuration { | 
|  | maximumTrybotDuration = duration | 
|  | } | 
|  | } | 
|  |  | 
|  | sklog.Infof("Maximum trybot duration for %s: %d", gerritURL, maximumTrybotDuration) | 
|  | sklog.Infof("Furthest completion time for %s: %s", gerritURL, endTimeOfCQBots) | 
|  | landedTotalDurationMetric := metrics2.GetInt64Metric(fmt.Sprintf("%s_%s_%s", c.metricName, LANDED_METRIC_NAME, LANDED_TOTAL_DURATION), map[string]string{"gerritURL": gerritURL}) | 
|  | landedTotalDurationMetric.Update(maximumTrybotDuration) | 
|  | reportedMetrics[landedTotalDurationMetric] = struct{}{} | 
|  | } | 
|  |  | 
|  | // ReportCQStatsForInFlightCL reports the following metrics for the specified | 
|  | // change and patchsetID: | 
|  | // * How long CQ trybots have been running for. | 
|  | // * How many CQ trybots have been triggered. | 
|  | // All created metrics will be registered in reportedMetrics. | 
|  | func (c *Client) ReportCQStatsForInFlightCL(cqBuilds []*buildbucketpb.Build, gerritURL string, reportedMetrics map[metrics2.Int64Metric]struct{}) { | 
|  | totalTriggeredCQBots := int(0) | 
|  | currentTime := time.Now() | 
|  | for _, b := range cqBuilds { | 
|  | totalTriggeredCQBots++ | 
|  |  | 
|  | createdTime, err := ptypes.Timestamp(b.CreateTime) | 
|  | if err != nil { | 
|  | sklog.Errorf("Failed to convert timestamp for %d; skipping: %s", b.Id, err) | 
|  | continue | 
|  | } | 
|  | createdTime = createdTime.UTC() | 
|  | if b.EndTime != nil { | 
|  | if time.Hour*24 < time.Now().UTC().Sub(createdTime) { | 
|  | // The build was created more than a day ago. Do not include it | 
|  | // in totalTriggeredCQBots. See skbug.com/7340. | 
|  | // Creation time is used above instead of completion time because | 
|  | // that is what CQ does: | 
|  | // https://chrome-internal.googlesource.com/infra/infra_internal/+show/master/infra_internal/services/cq/verification/tryjob_utils.py#1271 | 
|  | totalTriggeredCQBots-- | 
|  | } | 
|  | // The build has completed so move on. | 
|  | continue | 
|  | } | 
|  |  | 
|  | duration := int64(currentTime.Sub(createdTime).Seconds()) | 
|  | inflightTrybotDurationMetric := c.getInflightTrybotDurationMetric(b.Builder.Builder, gerritURL) | 
|  | inflightTrybotDurationMetric.Update(duration) | 
|  | reportedMetrics[inflightTrybotDurationMetric] = struct{}{} | 
|  | } | 
|  |  | 
|  | cqTryBotsMutex.RLock() | 
|  | cqTryBotsMutex.RUnlock() | 
|  | trybotNumDurationMetric := metrics2.GetInt64Metric(fmt.Sprintf("%s_%s_%s", c.metricName, INFLIGHT_METRIC_NAME, INFLIGHT_TRYBOT_NUM), map[string]string{"gerritURL": gerritURL}) | 
|  | trybotNumDurationMetric.Update(int64(totalTriggeredCQBots)) | 
|  | reportedMetrics[trybotNumDurationMetric] = struct{}{} | 
|  | } | 
|  |  | 
|  | func (c *Client) getInflightTrybotDurationMetric(tryBot, gerritURL string) metrics2.Int64Metric { | 
|  | metricName := fmt.Sprintf("%s_%s_%s", c.metricName, INFLIGHT_METRIC_NAME, INFLIGHT_TRYBOT_DURATION) | 
|  | tags := map[string]string{ | 
|  | "trybot":    tryBot, | 
|  | "gerritURL": gerritURL, | 
|  | } | 
|  | return metrics2.GetInt64Metric(metricName, tags) | 
|  | } | 
|  |  | 
|  | func (c *Client) getLandedTrybotDurationMetric(tryBot, gerritURL string) metrics2.Int64Metric { | 
|  | metricName := fmt.Sprintf("%s_%s_%s", c.metricName, LANDED_METRIC_NAME, LANDED_TRYBOT_DURATION) | 
|  | tags := map[string]string{ | 
|  | "trybot":    tryBot, | 
|  | "gerritURL": gerritURL, | 
|  | } | 
|  | return metrics2.GetInt64Metric(metricName, tags) | 
|  | } | 
|  |  | 
|  | func (c *Client) isCQTryBot(builderName string) bool { | 
|  | cqTryBotsMutex.RLock() | 
|  | isCQTrybot := c.cqTryBots[builderName] | 
|  | cqTryBotsMutex.RUnlock() | 
|  | return isCQTrybot | 
|  | } |