blob: 79fcacda7f1a135898d16fdca7bd64a9096b58ea [file] [log] [blame]
package autoroller
import (
"context"
"fmt"
"path"
"sync"
"time"
"go.skia.org/infra/autoroll/go/autoroll_modes"
"go.skia.org/infra/autoroll/go/recent_rolls"
"go.skia.org/infra/autoroll/go/repo_manager"
"go.skia.org/infra/go/autoroll"
"go.skia.org/infra/go/gerrit"
"go.skia.org/infra/go/metrics2"
"go.skia.org/infra/go/sklog"
"go.skia.org/infra/go/util"
)
const (
ROLL_ATTEMPT_THROTTLE_TIME = 30 * time.Minute
ROLL_ATTEMPT_THROTTLE_NUM = 3
STATUS_DRY_RUN_FAILURE = "dry run failed"
STATUS_DRY_RUN_IN_PROGRESS = "dry run in progress"
STATUS_DRY_RUN_SUCCESS = "dry run succeeded"
STATUS_ERROR = "error"
STATUS_IN_PROGRESS = "in progress"
STATUS_STOPPED = "stopped"
STATUS_THROTTLED = "throttled"
STATUS_UP_TO_DATE = "up to date"
)
// AutoRoller is a struct used for managing DEPS rolls.
type AutoRoller struct {
attemptCounter *util.AutoDecrementCounter
cqExtraTrybots string
emails []string
gerrit *gerrit.Gerrit
emailMtx sync.RWMutex
lastError error
liveness metrics2.Liveness
modeHistory *autoroll_modes.ModeHistory
modeMtx sync.Mutex
recent *recent_rolls.RecentRolls
rm repo_manager.RepoManager
runningMtx sync.Mutex
status *AutoRollStatusCache
rollIntoAndroid bool
}
// NewAutoRoller creates and returns a new AutoRoller which runs at the given frequency.
func NewAutoRoller(workdir, parentRepo, parentBranch, childPath, childBranch, cqExtraTrybots string, emails []string, gerrit *gerrit.Gerrit, depot_tools string, rollIntoAndroid, useManifest bool, strategy repo_manager.NextRollStrategy) (*AutoRoller, error) {
var err error
var rm repo_manager.RepoManager
if rollIntoAndroid {
rm, err = repo_manager.NewAndroidRepoManager(workdir, parentBranch, childPath, childBranch, gerrit, strategy, nil)
} else if useManifest {
rm, err = repo_manager.NewManifestRepoManager(workdir, parentRepo, parentBranch, childPath, childBranch, depot_tools, gerrit, strategy, nil)
} else {
rm, err = repo_manager.NewDEPSRepoManager(workdir, parentRepo, parentBranch, childPath, childBranch, depot_tools, gerrit, strategy, nil)
}
if err != nil {
return nil, err
}
recent, err := recent_rolls.NewRecentRolls(path.Join(workdir, "recent_rolls.db"))
if err != nil {
return nil, err
}
mh, err := autoroll_modes.NewModeHistory(path.Join(workdir, "autoroll_modes.db"))
if err != nil {
return nil, err
}
arb := &AutoRoller{
attemptCounter: util.NewAutoDecrementCounter(ROLL_ATTEMPT_THROTTLE_TIME),
cqExtraTrybots: cqExtraTrybots,
emails: emails,
gerrit: gerrit,
liveness: metrics2.NewLiveness("last-autoroll-landed", map[string]string{"child-path": childPath}),
modeHistory: mh,
recent: recent,
rm: rm,
status: &AutoRollStatusCache{},
rollIntoAndroid: rollIntoAndroid,
}
// Cycle once to fill out the current status.
if err := arb.doAutoRoll(); err != nil {
return nil, err
}
return arb, nil
}
// Start initiates the AutoRoller's loop.
func (r *AutoRoller) Start(tickFrequency, repoFrequency time.Duration, ctx context.Context) {
repo_manager.Start(r.rm, repoFrequency, ctx)
lv := metrics2.NewLiveness("last-successful-autoroll-tick")
go util.RepeatCtx(tickFrequency, ctx, func() {
if err := r.doAutoRoll(); err != nil {
sklog.Errorf("Failed to run autoroll: %s", err)
} else {
lv.Reset()
}
})
go func() {
for {
select {
case <-ctx.Done():
util.LogErr(r.Close())
default:
}
}
}()
}
// Close closes all sub-structs of the AutoRoller.
func (r *AutoRoller) Close() error {
err1 := r.recent.Close()
err2 := r.modeHistory.Close()
if err1 != nil {
return err1
}
if err2 != nil {
return err2
}
return nil
}
// AutoRollStatus is a struct which provides roll-up status information about
// the AutoRoll Bot.
type AutoRollStatus struct {
CurrentRoll *autoroll.AutoRollIssue `json:"currentRoll"`
Error string `json:"error"`
GerritUrl string `json:"gerritUrl"`
LastRoll *autoroll.AutoRollIssue `json:"lastRoll"`
LastRollRev string `json:"lastRollRev"`
Mode *autoroll_modes.ModeChange `json:"mode"`
Recent []*autoroll.AutoRollIssue `json:"recent"`
Status string `json:"status"`
ValidModes []string `json:"validModes"`
}
// AutoRollStatusCache is a struct used for caching roll-up status
// information about the AutoRoll Bot.
type AutoRollStatusCache struct {
currentRoll *autoroll.AutoRollIssue
gerritUrl string
lastError string
lastRoll *autoroll.AutoRollIssue
lastRollRev string
mode *autoroll_modes.ModeChange
mtx sync.RWMutex
recent []*autoroll.AutoRollIssue
status string
}
// Get returns the current status information.
func (c *AutoRollStatusCache) Get(includeError bool) *AutoRollStatus {
c.mtx.RLock()
defer c.mtx.RUnlock()
recent := make([]*autoroll.AutoRollIssue, 0, len(c.recent))
for _, r := range c.recent {
recent = append(recent, r.Copy())
}
validModes := make([]string, len(autoroll_modes.VALID_MODES))
copy(validModes, autoroll_modes.VALID_MODES)
s := &AutoRollStatus{
GerritUrl: c.gerritUrl,
LastRollRev: c.lastRollRev,
Recent: recent,
Status: c.status,
ValidModes: validModes,
}
if c.currentRoll != nil {
s.CurrentRoll = c.currentRoll.Copy()
}
if c.lastRoll != nil {
s.LastRoll = c.lastRoll.Copy()
}
if c.mode != nil {
s.Mode = c.mode.Copy()
}
if includeError && c.lastError != "" {
s.Error = c.lastError
}
return s
}
// set sets the current status information.
func (c *AutoRollStatusCache) Set(s *AutoRollStatus) error {
c.mtx.Lock()
defer c.mtx.Unlock()
recent := make([]*autoroll.AutoRollIssue, 0, len(s.Recent))
for _, r := range s.Recent {
recent = append(recent, r.Copy())
}
c.currentRoll = nil
if s.CurrentRoll != nil {
c.currentRoll = s.CurrentRoll.Copy()
}
c.lastRoll = nil
if s.LastRoll != nil {
c.lastRoll = s.LastRoll.Copy()
}
c.gerritUrl = s.GerritUrl
c.lastRollRev = s.LastRollRev
c.mode = s.Mode.Copy()
c.recent = recent
c.status = s.Status
return nil
}
// GetStatus returns the roll-up status of the bot.
func (r *AutoRoller) GetStatus(includeError bool) *AutoRollStatus {
return r.status.Get(includeError)
}
// SetMode sets the desired mode of the bot. This forces the bot to run and
// blocks until it finishes.
func (r *AutoRoller) SetMode(m, user, message string) error {
r.modeMtx.Lock()
defer r.modeMtx.Unlock()
if err := r.modeHistory.Add(m, user, message); err != nil {
return err
}
return r.doAutoRoll()
}
// isMode determines whether the bot is in the given mode.
func (r *AutoRoller) isMode(s string) bool {
return r.modeHistory.CurrentMode().Mode == s
}
// GetEmails returns the list of email addresses which are copied on DEPS rolls.
func (r *AutoRoller) GetEmails() []string {
r.emailMtx.RLock()
defer r.emailMtx.RUnlock()
rv := make([]string, len(r.emails))
copy(rv, r.emails)
return rv
}
// SetEmails sets the list of email addresses which are copied on DEPS rolls.
func (r *AutoRoller) SetEmails(e []string) {
r.emailMtx.Lock()
defer r.emailMtx.Unlock()
emails := make([]string, len(e))
copy(emails, e)
r.emails = emails
}
// closeIssue closes the given issue with the given message.
func (r *AutoRoller) closeIssue(issue *autoroll.AutoRollIssue, result, msg string) error {
sklog.Infof("Closing issue %d (result %q) with message: %s", issue.Issue, result, msg)
info, err := issue.ToGerritChangeInfo()
if err != nil {
return fmt.Errorf("Failed to convert issue to Gerrit ChangeInfo: %s", err)
}
if err := r.gerrit.Abandon(info, msg); err != nil {
return err
}
issue.Result = result
issue.Closed = true
issue.CommitQueue = false
issue.CommitQueueDryRun = false
return r.recent.Update(issue)
}
// addIssueComment adds a comment to the given issue.
func (r *AutoRoller) addIssueComment(issue *autoroll.AutoRollIssue, msg string) error {
sklog.Infof("Adding comment to issue: %q", msg)
info, err := issue.ToGerritChangeInfo()
if err != nil {
return fmt.Errorf("Failed to convert issue to Gerrit ChangeInfo: %s", err)
}
if err := r.gerrit.AddComment(info, msg); err != nil {
return err
}
updated, err := r.retrieveRoll(issue.Issue)
if err != nil {
return err
}
return r.recent.Update(updated)
}
// setDryRun sets the CQ dry run bit on the issue.
func (r *AutoRoller) setDryRun(issue *autoroll.AutoRollIssue, dryRun bool) error {
info, err := issue.ToGerritChangeInfo()
if err != nil {
return fmt.Errorf("Failed to convert issue to Gerrit ChangeInfo: %s", err)
}
if dryRun {
if err := r.rm.SendToGerritDryRun(info, ""); err != nil {
return err
}
} else {
if err := r.rm.SendToGerritCQ(info, ""); err != nil {
return err
}
}
updated, err := r.retrieveRoll(issue.Issue)
if err != nil {
return err
}
return r.recent.Update(updated)
}
// updateCurrentRoll retrieves updated information about the current DEPS roll.
func (r *AutoRoller) updateCurrentRoll() error {
currentRoll := r.recent.CurrentRoll()
if currentRoll == nil {
return nil
}
currentResult := currentRoll.Result
updated, err := r.retrieveRoll(currentRoll.Issue)
if err != nil {
return err
}
// We have to rely on data we store for the dry run case.
if !updated.Closed && util.In(currentResult, autoroll.DRY_RUN_RESULTS) {
updated.Result = currentResult
}
// If the current roll succeeded, we need to make sure we update the
// repo so that we see the roll commit. This can take some time, so
// we have to repeatedly update until we see the commit.
if updated.Committed {
sklog.Infof("Roll succeeded (%d); syncing the repo until it lands.", currentRoll.Issue)
for {
sklog.Info("Syncing...")
sklog.Infof("Looking for %s", currentRoll.RollingTo)
if err := r.rm.Update(); err != nil {
return err
}
rolledPast, err := r.rm.RolledPast(currentRoll.RollingTo)
if err != nil {
return err
}
if rolledPast {
break
}
time.Sleep(10 * time.Second)
}
r.liveness.Reset()
}
return r.recent.Update(updated)
}
// retrieveRoll obtains the given DEPS roll from the code review server.
func (r *AutoRoller) retrieveRoll(issueNum int64) (*autoroll.AutoRollIssue, error) {
var a *autoroll.AutoRollIssue
info, err := r.gerrit.GetIssueProperties(issueNum)
if err != nil {
return nil, fmt.Errorf("Failed to get issue properties: %s", err)
}
a, err = autoroll.FromGerritChangeInfo(info, r.rm.FullChildHash, r.rollIntoAndroid)
if err != nil {
return nil, fmt.Errorf("Failed to convert issue format: %s", err)
}
tryResults, err := autoroll.GetTryResultsFromGerrit(r.gerrit, a)
if err != nil {
return nil, fmt.Errorf("Failed to retrieve try results: %s", err)
}
a.TryResults = tryResults
return a, nil
}
// doAutoRoll is the primary method of the AutoRoll Bot. It runs on a timer,
// updates checkouts, manages active roll CLs, and uploads new rolls. It sets
// the status of the bot which may be read by users.
func (r *AutoRoller) doAutoRoll() error {
status, lastError := r.doAutoRollInner()
lastErrorStr := ""
if lastError != nil {
lastErrorStr = lastError.Error()
}
gerritUrl := r.gerrit.Url(0)
// Update status information.
if err := r.status.Set(&AutoRollStatus{
CurrentRoll: r.recent.CurrentRoll(),
Error: lastErrorStr,
GerritUrl: gerritUrl,
LastRoll: r.recent.LastRoll(),
LastRollRev: r.rm.LastRollRev(),
Mode: r.modeHistory.CurrentMode(),
Recent: r.recent.GetRecentRolls(),
Status: status,
}); err != nil {
return err
}
return lastError
}
// makeRollResult determines what the result of a roll should be, given that
// it is going to be closed.
func (r *AutoRoller) makeRollResult(roll *autoroll.AutoRollIssue) string {
if util.In(roll.Result, autoroll.DRY_RUN_RESULTS) {
if roll.Result == autoroll.ROLL_RESULT_DRY_RUN_IN_PROGRESS {
return autoroll.ROLL_RESULT_DRY_RUN_FAILURE
} else {
return roll.Result
}
}
return autoroll.ROLL_RESULT_FAILURE
}
func (r *AutoRoller) isDryRunDone(roll *autoroll.AutoRollIssue) (bool, error) {
if r.rollIntoAndroid {
i, err := r.gerrit.GetIssueProperties(roll.Issue)
if err != nil {
return false, err
}
if _, ok := i.Labels[gerrit.PRESUBMIT_VERIFIED_LABEL]; ok {
for _, lb := range i.Labels[gerrit.PRESUBMIT_VERIFIED_LABEL].All {
if lb.Value != 0 {
return true, nil
}
}
}
return false, nil
}
// The CQ removes the CQ+1 label when the dry run finishes, regardless
// of success or failure. Since we uploaded with the dry run label set,
// we know the roll is in progress if the label is still set, and done
// otherwise.
return !roll.CommitQueueDryRun, nil
}
func (r *AutoRoller) isDryRunSuccessful(roll *autoroll.AutoRollIssue) (bool, error) {
if r.rollIntoAndroid {
i, err := r.gerrit.GetIssueProperties(roll.Issue)
if err != nil {
return false, err
}
for _, lb := range i.Labels[gerrit.PRESUBMIT_VERIFIED_LABEL].All {
if lb.Value == -1 {
return false, nil
}
}
return true, nil
}
return roll.AllTrybotsSucceeded(), nil
}
// doAutoRollInner does the actual work of the AutoRoll.
func (r *AutoRoller) doAutoRollInner() (string, error) {
r.runningMtx.Lock()
defer r.runningMtx.Unlock()
// Get updated info about the current roll.
if err := r.updateCurrentRoll(); err != nil {
return STATUS_ERROR, err
}
// There's a currently-active roll. Determine whether or not it's still good.
// If so, leave it open and exit. If not, close it so that we can open another.
currentRoll := r.recent.CurrentRoll()
if currentRoll != nil {
sklog.Infof("Found current roll: %s", r.issueUrl(currentRoll.Issue))
if r.isMode(autoroll_modes.MODE_DRY_RUN) {
// If we have a normal (non-dry-run) roll running,
// switch it to a dry run.
if currentRoll.CommitQueue {
sklog.Infof("Setting dry-run bit on %s", r.gerrit.Url(currentRoll.Issue))
if err := r.setDryRun(currentRoll, true); err != nil {
return STATUS_ERROR, err
}
return STATUS_DRY_RUN_IN_PROGRESS, nil
}
// If the CQ has finished, determine if it was a success
// or failure.
rollDone, err := r.isDryRunDone(currentRoll)
if err != nil {
return STATUS_ERROR, err
}
if rollDone {
result := autoroll.ROLL_RESULT_DRY_RUN_FAILURE
status := STATUS_DRY_RUN_FAILURE
rollSuccessful, err := r.isDryRunSuccessful(currentRoll)
if err != nil {
return STATUS_ERROR, err
}
if rollSuccessful {
result = autoroll.ROLL_RESULT_DRY_RUN_SUCCESS
status = STATUS_DRY_RUN_SUCCESS
}
sklog.Infof("Dry run is finished: %v", currentRoll)
if currentRoll.RollingTo != r.rm.NextRollRev() {
if err := r.closeIssue(currentRoll, result, fmt.Sprintf("Repo has passed %s; will open a new dry run.", currentRoll.RollingTo)); err != nil {
return STATUS_ERROR, err
}
} else if currentRoll.Result != result {
// The dry run just finished. Set its result.
if result == autoroll.ROLL_RESULT_DRY_RUN_FAILURE {
if err := r.closeIssue(currentRoll, result, "Dry run failed. Closing, will open another."); err != nil {
return STATUS_ERROR, err
}
} else {
if err := r.addIssueComment(currentRoll, "Dry run finished successfully; leaving open in case we want to land"); err != nil {
return STATUS_ERROR, err
}
currentRoll.Result = result
if err := r.recent.Update(currentRoll); err != nil {
return STATUS_ERROR, err
}
return status, nil
}
} else {
// The dry run is finished but still good. Leave it open.
sklog.Infof("Dry run is finished and still good.")
return status, nil
}
} else {
sklog.Infof("Dry run still in progress.")
return STATUS_DRY_RUN_IN_PROGRESS, nil
}
} else {
if currentRoll.CommitQueueDryRun {
sklog.Infof("Unsetting dry run bit on %s", r.gerrit.Url(currentRoll.Issue))
if err := r.setDryRun(currentRoll, false); err != nil {
return STATUS_ERROR, err
}
}
if r.isMode(autoroll_modes.MODE_STOPPED) {
// If we're stopped, close the issue.
// Respect the previous result of the roll.
if err := r.closeIssue(currentRoll, r.makeRollResult(currentRoll), "AutoRoller is stopped; closing the active roll."); err != nil {
return STATUS_ERROR, err
}
} else if !currentRoll.CommitQueue {
// If the CQ failed, close the issue.
// Special case: if the current roll was a dry run which succeeded, land it.
if currentRoll.Result == autoroll.ROLL_RESULT_DRY_RUN_SUCCESS {
sklog.Infof("Dry run succeeded. Attempting to land.")
if err := r.setDryRun(currentRoll, false); err != nil {
return STATUS_ERROR, nil
}
return STATUS_IN_PROGRESS, nil
} else {
if err := r.closeIssue(currentRoll, autoroll.ROLL_RESULT_FAILURE, "Commit queue failed; closing this roll."); err != nil {
return STATUS_ERROR, err
}
}
} else if time.Since(currentRoll.Modified) > 24*time.Hour {
// If the roll has been open too long, close the issue.
if err := r.closeIssue(currentRoll, autoroll.ROLL_RESULT_FAILURE, "Roll has been open for over 24 hours; closing."); err != nil {
return STATUS_ERROR, err
}
} else {
rolledPast, err := r.rm.RolledPast(currentRoll.RollingTo)
if err != nil {
return STATUS_ERROR, err
}
if rolledPast {
// If we've already rolled past the target revision, close the issue
if err := r.closeIssue(currentRoll, autoroll.ROLL_RESULT_FAILURE, fmt.Sprintf("Already rolled past %s; closing this roll.", currentRoll.RollingTo)); err != nil {
return STATUS_ERROR, err
}
} else {
// Current roll is still good.
sklog.Infof("Roll is still active (%d): %s", currentRoll.Issue, currentRoll.Subject)
return STATUS_IN_PROGRESS, nil
}
}
}
}
// If we're stopped, exit.
if r.isMode(autoroll_modes.MODE_STOPPED) {
sklog.Infof("Roller is stopped; not opening new rolls.")
return STATUS_STOPPED, nil
}
// If we're up-to-date, exit.
lastRollRev := r.rm.LastRollRev()
nextRollRev := r.rm.NextRollRev()
if lastRollRev == nextRollRev {
sklog.Infof("Repo is up-to-date.")
return STATUS_UP_TO_DATE, nil
}
// Create a new roll.
if r.attemptCounter.Get() >= ROLL_ATTEMPT_THROTTLE_NUM {
return STATUS_THROTTLED, nil
}
r.attemptCounter.Inc()
dryRun := r.isMode(autoroll_modes.MODE_DRY_RUN)
uploadedNum, err := r.rm.CreateNewRoll(lastRollRev, nextRollRev, r.GetEmails(), r.cqExtraTrybots, dryRun)
if err != nil {
return STATUS_ERROR, fmt.Errorf("Failed to upload a new roll: %s", err)
}
uploaded, err := r.retrieveRoll(uploadedNum)
if err != nil {
return STATUS_ERROR, fmt.Errorf("Failed to retrieve uploaded roll: %s", err)
}
if err := r.recent.Add(uploaded); err != nil {
return STATUS_ERROR, fmt.Errorf("Failed to insert uploaded roll into database: %s", err)
}
if r.isMode(autoroll_modes.MODE_DRY_RUN) {
return STATUS_DRY_RUN_IN_PROGRESS, nil
}
return STATUS_IN_PROGRESS, nil
}
func (r *AutoRoller) issueUrl(num int64) string {
return r.gerrit.Url(num)
}
func (r *AutoRoller) User() string {
return r.rm.User()
}