[autoroll] Limit to 3 retries of the same roll
Additionally, delay sending the notification that a roll failed and
updating metrics until the roll is closed (either landed or abandoned
after 3 attempts).
Bug: skia:10696
Change-Id: I53d70c02177c37a1717019f52f8bdcff0eee8459
Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/436056
Commit-Queue: Eric Boren <borenet@google.com>
Reviewed-by: Ravi Mistry <rmistry@google.com>
diff --git a/autoroll/go/codereview/roll.go b/autoroll/go/codereview/roll.go
index bd386bd..f561a59 100644
--- a/autoroll/go/codereview/roll.go
+++ b/autoroll/go/codereview/roll.go
@@ -273,6 +273,7 @@
return skerr.Wrap(err)
}
r.issue.IsDryRun = false
+ r.issue.Attempt++
return nil
})
}
@@ -287,13 +288,14 @@
return skerr.Wrap(err)
}
r.issue.IsDryRun = true
+ r.issue.Attempt++
return nil
})
}
// See documentation for state_machine.RollCLImpl interface.
func (r *gerritRoll) Update(ctx context.Context) error {
- alreadyFinished := r.IsFinished()
+ alreadyClosed := r.IsClosed()
ci, err := r.retrieveRoll(ctx)
if err != nil {
return err
@@ -305,12 +307,17 @@
if err := r.recent.Update(ctx, r.issue); err != nil {
return err
}
- if r.IsFinished() && !alreadyFinished && r.finishedCallback != nil {
+ if r.IsClosed() && !alreadyClosed && r.finishedCallback != nil {
return r.finishedCallback(ctx, r)
}
return nil
}
+// See documentation for state_machine.RollClImpl interface.
+func (r *gerritRoll) Attempt() int {
+ return r.issue.Attempt
+}
+
// See documentation for state_machine.RollCLImpl interface.
func (r *gerritRoll) IssueID() string {
return fmt.Sprintf("%d", r.issue.Issue)
@@ -488,7 +495,7 @@
// See documentation for state_machine.RollCLImpl interface.
func (r *githubRoll) Update(ctx context.Context) error {
- alreadyFinished := r.IsFinished()
+ alreadyClosed := r.IsClosed()
pullRequest, err := r.retrieveRoll(ctx)
if err != nil {
return err
@@ -500,7 +507,7 @@
if err := r.recent.Update(ctx, r.issue); err != nil {
return err
}
- if r.IsFinished() && !alreadyFinished && r.finishedCallback != nil {
+ if r.IsClosed() && !alreadyClosed && r.finishedCallback != nil {
return r.finishedCallback(ctx, r)
}
return nil
@@ -573,6 +580,7 @@
return err
}
r.issue.IsDryRun = false
+ r.issue.Attempt++
return nil
})
}
@@ -584,10 +592,16 @@
return err
}
r.issue.IsDryRun = true
+ r.issue.Attempt++
return nil
})
}
+// See documentation for state_machine.RollClImpl interface.
+func (r *githubRoll) Attempt() int {
+ return r.issue.Attempt
+}
+
// See documentation for state_machine.RollCLImpl interface.
func (r *githubRoll) IssueID() string {
return fmt.Sprintf("%d", r.issue.Issue)
diff --git a/autoroll/go/state_machine/state_machine.go b/autoroll/go/state_machine/state_machine.go
index dfd9086..46f2cbf 100644
--- a/autoroll/go/state_machine/state_machine.go
+++ b/autoroll/go/state_machine/state_machine.go
@@ -65,6 +65,9 @@
// arbitrary limit just to keep us from performing an unbounded number
// of transitions at a time.
MAX_NOOP_TRANSITIONS = 10
+
+ // maxRollCQAttempts is the maximum number of CQ attempts per roll.
+ maxRollCQAttempts = 3
)
// Interface for interacting with a single autoroll CL.
@@ -72,6 +75,10 @@
// Add a comment to the CL.
AddComment(context.Context, string) error
+ // Attempt returns the number of the current attempt for this roll, starting
+ // at zero for the first attempt.
+ Attempt() int
+
// Close the CL. The first string argument is the result of the roll,
// and the second is the message to add to the CL on closing.
Close(context.Context, string, string) error
@@ -598,15 +605,17 @@
if currentRoll.IsClosed() {
return S_NORMAL_IDLE, nil
}
- throttle := s.a.FailureThrottle()
- if err := throttle.Inc(ctx); err != nil {
- return "", err
- }
- if s.a.GetNextRollRev().Id == currentRoll.RollingTo().Id {
- // Rather than upload the same CL again, we'll try
- // running the CQ again after a period of throttling.
- if throttle.IsThrottled() {
- return S_NORMAL_FAILURE_THROTTLED, nil
+ if currentRoll.Attempt() < maxRollCQAttempts-1 {
+ throttle := s.a.FailureThrottle()
+ if err := throttle.Inc(ctx); err != nil {
+ return "", err
+ }
+ if s.a.GetNextRollRev().Id == currentRoll.RollingTo().Id {
+ // Rather than upload the same CL again, we'll try
+ // running the CQ again after a period of throttling.
+ if throttle.IsThrottled() {
+ return S_NORMAL_FAILURE_THROTTLED, nil
+ }
}
}
return S_NORMAL_IDLE, nil
@@ -727,13 +736,15 @@
if currentRoll == nil {
return S_CURRENT_ROLL_MISSING, nil
}
- if err := s.a.FailureThrottle().Inc(ctx); err != nil {
- return "", err
- }
- if s.a.GetNextRollRev().Id == currentRoll.RollingTo().Id {
- // Rather than upload the same CL again, we'll try
- // running the CQ again after a period of throttling.
- return S_DRY_RUN_FAILURE_THROTTLED, nil
+ if currentRoll.Attempt() < maxRollCQAttempts-1 {
+ if err := s.a.FailureThrottle().Inc(ctx); err != nil {
+ return "", err
+ }
+ if s.a.GetNextRollRev().Id == currentRoll.RollingTo().Id {
+ // Rather than upload the same CL again, we'll try
+ // running the CQ again after a period of throttling.
+ return S_DRY_RUN_FAILURE_THROTTLED, nil
+ }
}
return S_DRY_RUN_IDLE, nil
case S_DRY_RUN_FAILURE_THROTTLED:
diff --git a/autoroll/go/state_machine/state_machine_test.go b/autoroll/go/state_machine/state_machine_test.go
index 97640c8..1d42df3 100644
--- a/autoroll/go/state_machine/state_machine_test.go
+++ b/autoroll/go/state_machine/state_machine_test.go
@@ -23,6 +23,7 @@
// A completely mocked implementation of RollCLImpl.
type TestRollCLImpl struct {
t *testing.T
+ attempt int
closedStatus string
closedMsg string
isDryRun bool
@@ -47,6 +48,11 @@
}
// See documentation for RollCLImpl.
+func (r *TestRollCLImpl) Attempt() int {
+ return r.attempt
+}
+
+// See documentation for RollCLImpl.
func (r *TestRollCLImpl) Close(ctx context.Context, status, msg string) error {
r.closedStatus = status
r.closedMsg = msg
@@ -147,6 +153,7 @@
func (r *TestRollCLImpl) RetryDryRun(ctx context.Context) error {
r.isDryRun = true
r.dryRunResult = ""
+ r.attempt++
return nil
}
@@ -154,6 +161,7 @@
func (r *TestRollCLImpl) RetryCQ(ctx context.Context) error {
r.isDryRun = false
r.normalResult = ""
+ r.attempt++
return nil
}
@@ -852,3 +860,69 @@
require.NoError(t, sm.NextTransition(ctx))
}
}
+
+func TestAbandonAfterTooManyAttempts(t *testing.T) {
+ ctx, sm, r, gcsClient, cleanup := setup(t)
+ defer cleanup()
+
+ counterFile := "fail_counter"
+ failureThrottle, err := NewThrottler(ctx, gcsClient, counterFile, time.Hour, 1)
+ require.NoError(t, err)
+ r.failureThrottle = failureThrottle
+
+ r.SetNextRollRev("HEAD+1")
+
+ failCL := func(expectedState string) {
+ checkNextState(t, sm, S_NORMAL_ACTIVE)
+ roll := r.GetActiveRoll().(*TestRollCLImpl)
+ roll.SetFailed()
+ checkNextState(t, sm, S_NORMAL_FAILURE)
+ checkNextState(t, sm, expectedState)
+ }
+
+ for i := 0; i < maxRollCQAttempts-1; i++ {
+ failCL(S_NORMAL_FAILURE_THROTTLED)
+
+ // Hack the timer to fake that the throttling has expired, then ensure
+ // that we end up at the expected state.
+ require.NoError(t, gcsClient.DeleteFile(ctx, counterFile))
+ failureThrottle, err = NewThrottler(ctx, gcsClient, counterFile, time.Minute, 1)
+ require.NoError(t, err)
+ r.failureThrottle = failureThrottle
+ }
+ failCL(S_NORMAL_IDLE)
+}
+
+func TestAbandonAfterTooManyAttemptsDryRun(t *testing.T) {
+ ctx, sm, r, gcsClient, cleanup := setup(t)
+ defer cleanup()
+
+ counterFile := "fail_counter"
+ failureThrottle, err := NewThrottler(ctx, gcsClient, counterFile, time.Hour, 1)
+ require.NoError(t, err)
+ r.failureThrottle = failureThrottle
+
+ r.SetMode(ctx, modes.ModeDryRun)
+ checkNextState(t, sm, S_DRY_RUN_IDLE)
+ r.SetNextRollRev("HEAD+1")
+
+ failCL := func(expectedState string) {
+ checkNextState(t, sm, S_DRY_RUN_ACTIVE)
+ roll := r.GetActiveRoll().(*TestRollCLImpl)
+ roll.SetDryRunFailed()
+ checkNextState(t, sm, S_DRY_RUN_FAILURE)
+ checkNextState(t, sm, expectedState)
+ }
+
+ for i := 0; i < maxRollCQAttempts-1; i++ {
+ failCL(S_DRY_RUN_FAILURE_THROTTLED)
+
+ // Hack the timer to fake that the throttling has expired, then ensure
+ // that we end up at the expected state.
+ require.NoError(t, gcsClient.DeleteFile(ctx, counterFile))
+ failureThrottle, err = NewThrottler(ctx, gcsClient, counterFile, time.Minute, 1)
+ require.NoError(t, err)
+ r.failureThrottle = failureThrottle
+ }
+ failCL(S_DRY_RUN_IDLE)
+}
diff --git a/go/autoroll/autoroll.go b/go/autoroll/autoroll.go
index b1b7a28..25c411e 100644
--- a/go/autoroll/autoroll.go
+++ b/go/autoroll/autoroll.go
@@ -75,6 +75,7 @@
// AutoRollIssue is a struct containing the information we care about for
// AutoRoll CLs.
type AutoRollIssue struct {
+ Attempt int `json:"attempts"`
Closed bool `json:"closed"`
Comments []*comment.Comment `json:"comments"`
Committed bool `json:"committed"`
@@ -139,6 +140,7 @@
}
}
return &AutoRollIssue{
+ Attempt: i.Attempt,
Closed: i.Closed,
Comments: commentsCpy,
Committed: i.Committed,
diff --git a/go/autoroll/autoroll_test.go b/go/autoroll/autoroll_test.go
index 93e1b6e..2809357 100644
--- a/go/autoroll/autoroll_test.go
+++ b/go/autoroll/autoroll_test.go
@@ -17,7 +17,8 @@
func TestAutoRollIssueCopy(t *testing.T) {
unittest.SmallTest(t)
roll := &AutoRollIssue{
- Closed: true,
+ Attempt: 2,
+ Closed: true,
Comments: []*comment.Comment{
{
Id: "123",