[repograph] Add tracking of commit indexes, branch membership
Change-Id: I01917a10364655477e3d4d95a9e520d65079fe0a
Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/234901
Commit-Queue: Eric Boren <borenet@google.com>
Reviewed-by: Kevin Lubick <kjlubick@google.com>
diff --git a/go/git/repograph/graph.go b/go/git/repograph/graph.go
index 0238141..7d70ed5 100644
--- a/go/git/repograph/graph.go
+++ b/go/git/repograph/graph.go
@@ -14,7 +14,9 @@
"go.skia.org/infra/go/git"
"go.skia.org/infra/go/metrics2"
+ "go.skia.org/infra/go/skerr"
"go.skia.org/infra/go/sklog"
+ "go.skia.org/infra/go/util"
"go.skia.org/infra/go/vcsinfo"
)
@@ -26,12 +28,6 @@
type Commit struct {
*vcsinfo.LongCommit
parents []*Commit
-
- // HistoryLen is the number of commits in the longest line of history
- // reachable from this Commit. It is only used during Recurse as an
- // estimate of the number of commits which might be visited, to prevent
- // excessive resizing of the visited map.
- HistoryLen int
}
// Parents returns the parents of this commit.
@@ -52,8 +48,11 @@
// sklog.Info(c.Hash)
// return nil
// })
+//
+// If the passed-in function returns no errors other than ErrStopRecursing,
+// Recurse() will never return an error.
func (c *Commit) Recurse(f func(*Commit) error) error {
- return c.recurse(f, make(map[*Commit]bool, c.HistoryLen))
+ return c.recurse(f, make(map[*Commit]bool, c.Index+1))
}
// recurse is a helper function used by Recurse.
@@ -93,10 +92,28 @@
return nil
}
+// RecurseFirstParent is like Recurse, but it only follows the first parent of
+// each commit. Like Recurse, if the passed-in function returns no errors other
+// than ErrStopRecursing, RecurseFirstParent() will never return an error.
+func (c *Commit) RecurseFirstParent(f func(*Commit) error) error {
+ for {
+ if err := f(c); err == ErrStopRecursing {
+ return nil
+ } else if err != nil {
+ return err
+ }
+ if len(c.parents) == 0 {
+ return nil
+ } else {
+ c = c.parents[0]
+ }
+ }
+}
+
// AllCommits returns the hashes of all commits reachable from this Commit, in
// reverse topological order.
func (c *Commit) AllCommits() ([]string, error) {
- commits := make(map[*Commit]bool, c.HistoryLen)
+ commits := make(map[*Commit]bool, c.Index+1)
if err := c.recurse(func(c *Commit) error {
return nil
}, commits); err != nil {
@@ -162,7 +179,8 @@
// Update the local view of the repo.
Update(context.Context) error
- // Return the details for the given commit.
+ // Return the details for the given commit. The Graph takes ownership
+ // of the returned LongCommit and may modify it.
Details(context.Context, string) (*vcsinfo.LongCommit, error)
// Return the branch heads, as of the last call to Update().
@@ -179,8 +197,9 @@
commits map[string]*Commit
graphMtx sync.RWMutex
- updateMtx sync.Mutex
- repoImpl RepoImpl
+ updateMtx sync.Mutex
+ repoImpl RepoImpl
+ trackBranches bool
}
// NewLocalGraph returns a Graph instance, creating a git.Repo from the repoUrl
@@ -216,11 +235,50 @@
return len(r.commits)
}
+// EnableBranchTracking enables tracking of commit membership in branches. A
+// commit is considered a member of a branch if and only if it is reachable by
+// tracing the *first parent* lineage backward from the branch head. Notably,
+// this will result in different branch membership than would be obtained by
+// `git branch --all --list --contains <hash` as is used elsewhere. This process
+// is somewhat expensive and is thus not enabled by default.
+func (r *Graph) EnableBranchTracking() {
+ r.updateMtx.Lock()
+ defer r.updateMtx.Unlock()
+ r.graphMtx.Lock()
+ defer r.graphMtx.Unlock()
+ r.trackBranches = true
+ r.updateBranchInfo()
+}
+
+// updateBranchInfo updates the membership of all commits in all branches. The
+// caller must hold r.updateMtx AND r.graphMtx.
+func (r *Graph) updateBranchInfo() {
+ // TODO(borenet): We have to obtain this information from scratch when
+ // EnableBranchTracking() is called, but we should be able to update it
+ // incrementally in updateFrom() in the common case. It seemed that
+ // there were enough edge cases that updateFrom() would become
+ // significantly more complicated, so I did not attempt that.
+ membership := make(map[*Commit]util.StringSet, len(r.commits))
+ for _, branch := range r.branches {
+ _ = r.commits[branch.Head].RecurseFirstParent(func(c *Commit) error {
+ m := membership[c]
+ if m == nil {
+ m = util.NewStringSet()
+ membership[c] = m
+ }
+ m[branch.Name] = true
+ return nil
+ })
+ }
+ for _, c := range r.commits {
+ c.Branches = membership[c]
+ }
+}
+
// addCommit adds the given commit to the Graph. Requires that the commit's
// parents are already in the Graph. Assumes that the caller holds r.graphMtx.
func (r *Graph) addCommit(lc *vcsinfo.LongCommit) error {
defer metrics2.FuncTimer().Stop()
- maxParentHistoryLen := 0
var parents []*Commit
if len(lc.Parents) > 0 {
for _, h := range lc.Parents {
@@ -232,8 +290,8 @@
return fmt.Errorf("repograph.Graph: Could not find parent commit %q", h)
}
parents = append(parents, p)
- if p.HistoryLen > maxParentHistoryLen {
- maxParentHistoryLen = p.HistoryLen
+ if lc.Index == 0 {
+ lc.Index = p.Index + 1
}
}
}
@@ -241,7 +299,6 @@
c := &Commit{
LongCommit: lc,
parents: parents,
- HistoryLen: maxParentHistoryLen + 1,
}
r.commits[c.Hash] = c
return nil
@@ -417,6 +474,9 @@
defer r.graphMtx.Unlock()
r.branches = newGraph.branches
r.commits = newGraph.commits
+ if r.trackBranches {
+ r.updateBranchInfo()
+ }
return added, removed, nil
}
@@ -701,6 +761,39 @@
return rv
}
+// LogLinear is equivalent to "git log --first-parent --ancestry-path from..to",
+// ie. it only returns commits which are on the direct path from A to B, and
+// only on the "main" branch. This is as opposed to "git log from..to" which
+// returns all commits which are ancestors of 'to' but not 'from'. The 'from'
+// commit may be the empty string, in which case all commits in the first-parent
+// line are returned.
+func (r *Graph) LogLinear(from, to string) ([]*Commit, error) {
+ fromCommit := r.Get(from)
+ if fromCommit == nil && from != "" {
+ return nil, skerr.Fmt("No such commit %q", from)
+ }
+ toCommit := r.Get(to)
+ if toCommit == nil {
+ return nil, skerr.Fmt("No such commit %q", to)
+ }
+ rv := []*Commit{}
+ found := false
+ _ = toCommit.RecurseFirstParent(func(c *Commit) error {
+ if c == fromCommit {
+ found = true
+ return ErrStopRecursing
+ }
+ rv = append(rv, c)
+ return nil
+ })
+ if !found && from != "" {
+ // If the "from" commit was not found, then there is no ancestry
+ // path from "from" to "to".
+ return nil, nil
+ }
+ return rv, nil
+}
+
// IsAncestor returns true iff A is an ancestor of B, where A and B are either
// commit hashes or branch names.
func (r *Graph) IsAncestor(a, b string) (bool, error) {
@@ -818,6 +911,9 @@
newGraph := newGraphs[repoUrl]
r.branches = newGraph.branches
r.commits = newGraph.commits
+ if r.trackBranches {
+ r.updateBranchInfo()
+ }
}
return added, removed, nil
}
diff --git a/go/git/repograph/local_test.go b/go/git/repograph/local_test.go
index 321f1bb..3a823a6 100644
--- a/go/git/repograph/local_test.go
+++ b/go/git/repograph/local_test.go
@@ -57,6 +57,13 @@
shared_tests.TestRecurseAllBranches(t, ctx, g, repo, ud)
}
+func TestLogLinearRepo(t *testing.T) {
+ unittest.MediumTest(t)
+ ctx, g, repo, ud, cleanup := setupRepo(t)
+ defer cleanup()
+ shared_tests.TestLogLinear(t, ctx, g, repo, ud)
+}
+
func TestUpdateHistoryChangedRepo(t *testing.T) {
unittest.MediumTest(t)
ctx, g, repo, ud, cleanup := setupRepo(t)
@@ -77,3 +84,10 @@
defer cleanup()
shared_tests.TestRevList(t, ctx, g, repo, ud)
}
+
+func TestBranchMembershipRepo(t *testing.T) {
+ unittest.MediumTest(t)
+ ctx, g, repo, ud, cleanup := setupRepo(t)
+ defer cleanup()
+ shared_tests.TestBranchMembership(t, ctx, g, repo, ud)
+}
diff --git a/go/git/repograph/shared_tests/shared_tests.go b/go/git/repograph/shared_tests/shared_tests.go
index f662c77..e4ee171 100644
--- a/go/git/repograph/shared_tests/shared_tests.go
+++ b/go/git/repograph/shared_tests/shared_tests.go
@@ -7,6 +7,7 @@
"os"
"path"
"sort"
+ "strings"
assert "github.com/stretchr/testify/require"
"go.skia.org/infra/go/deepequal"
@@ -214,6 +215,13 @@
assert.Equal(t, []*repograph.Commit(nil), c1.GetParents())
assert.Equal(t, []*repograph.Commit{c2}, c3.GetParents())
+ // Assert that each of the commits has the correct index.
+ assert.Equal(t, 0, c1.Index)
+ assert.Equal(t, 1, c2.Index)
+ assert.Equal(t, 2, c3.Index)
+ assert.Equal(t, 2, c4.Index)
+ assert.Equal(t, 3, c5.Index)
+
// Ensure that we can start in an empty dir and check out from scratch properly.
tmp2, err := ioutil.TempDir("", "")
assert.NoError(t, err)
@@ -358,6 +366,50 @@
assert.True(t, gotCommits[c2])
}
+func TestLogLinear(t sktest.TestingT, ctx context.Context, g *git_testutils.GitBuilder, repo *repograph.Graph, rf RepoImplRefresher) {
+ commits := GitSetup(t, ctx, g, repo, rf)
+
+ c1 := commits[0]
+ c2 := commits[1]
+ c3 := commits[2]
+ c4 := commits[3]
+ c5 := commits[4]
+
+ gitdir := git.GitDir(g.Dir())
+ test := func(from, to string, checkAgainstGit bool, expect ...*repograph.Commit) {
+ if checkAgainstGit {
+ // Ensure that our expectations match actual git results.
+ cmd := []string{"--first-parent"}
+ if from == "" {
+ cmd = append(cmd, to)
+ } else {
+ cmd = append(cmd, "--ancestry-path", fmt.Sprintf("%s..%s", from, to))
+ }
+ hashes, err := gitdir.RevList(ctx, cmd...)
+ assert.NoError(t, err)
+ assert.Equal(t, len(hashes), len(expect))
+ for i, h := range hashes {
+ assert.Equal(t, h, expect[i].Hash)
+ }
+ }
+
+ // Ensure that we get the expected results from the Graph.
+ actual, err := repo.LogLinear(from, to)
+ assert.NoError(t, err)
+ deepequal.AssertDeepEqual(t, expect, actual)
+ }
+
+ // Get the full linear history from c5.
+ test("", c5.Hash, true, c5, c4, c2, c1)
+ // Get the linear history from c1 to c5. Like "git log", we don't
+ // include the "from" commit in the results.
+ test(c1.Hash, c5.Hash, true, c5, c4, c2)
+ // c3 is not reachable via first-parents from c5. For some reason, git
+ // actually returns c5.Hash, even though c5 has c4 as its first parent.
+ // Ignore the check against git in this case.
+ test(c3.Hash, c5.Hash, false)
+}
+
func TestUpdateHistoryChanged(t sktest.TestingT, ctx context.Context, g *git_testutils.GitBuilder, repo *repograph.Graph, rf RepoImplRefresher) {
commits := GitSetup(t, ctx, g, repo, rf)
@@ -671,3 +723,87 @@
check(commits[2], commits[4], commits[3:5])
check(commits[3], commits[4], []string{commits[2], commits[4]})
}
+
+func TestBranchMembership(t sktest.TestingT, ctx context.Context, gb *git_testutils.GitBuilder, repo *repograph.Graph, rf RepoImplRefresher) {
+ commits := GitSetup(t, ctx, gb, repo, rf)
+ c1 := commits[0]
+ c2 := commits[1]
+ c3 := commits[2]
+ c4 := commits[3]
+ c5 := commits[4]
+ test := func(c *repograph.Commit, branches ...string) {
+ sklog.Errorf("%d: %s", c.Index, c.Hash)
+ sklog.Errorf("Expect:\n%s", strings.Join(branches, "\n"))
+ actualStr := ""
+ for branch := range c.Branches {
+ actualStr += branch + "\n"
+ }
+ sklog.Errorf("Actual:\n%s", actualStr)
+ assert.Equal(t, len(branches), len(c.Branches))
+ for _, b := range branches {
+ assert.True(t, c.Branches[b])
+ }
+ }
+
+ // Branches should be nil at first.
+ for _, c := range commits {
+ assert.Nil(t, c.Branches)
+ }
+
+ // Enable branch tracking. Ensure that all commits were updated with the
+ // correct branch membership.
+ repo.EnableBranchTracking()
+ test(c1, "master", "branch2")
+ test(c2, "master", "branch2")
+ test(c3, "branch2") // c3 is reachable from master, but not via first-parent.
+ test(c4, "master")
+ test(c5, "master")
+
+ // Ensure that subsequent calls to Update() cause the branch membership
+ // to be updated.
+ gb.CreateBranchTrackBranch(ctx, "b3", "master")
+ rf.Refresh()
+ assert.NoError(t, repo.Update(ctx))
+ test(c1, "master", "branch2", "b3")
+ test(c2, "master", "branch2", "b3")
+ test(c3, "branch2") // c3 is reachable from b3, but not via first-parent.
+ test(c4, "master", "b3")
+ test(c5, "master", "b3")
+
+ // Reset b3 to branch2.
+ gb.Reset(ctx, "--hard", "branch2")
+ rf.Refresh()
+ assert.NoError(t, repo.Update(ctx))
+ test(c1, "master", "branch2", "b3")
+ test(c2, "master", "branch2", "b3")
+ test(c3, "branch2", "b3")
+ test(c4, "master")
+ test(c5, "master")
+
+ // Delete b3. We should get the same results as before it was added.
+ gb.CheckoutBranch(ctx, "master")
+ gb.UpdateRef(ctx, "-d", "refs/heads/b3")
+ rf.Refresh()
+ assert.NoError(t, repo.Update(ctx))
+ test(c1, "master", "branch2")
+ test(c2, "master", "branch2")
+ test(c3, "branch2")
+ test(c4, "master")
+ test(c5, "master")
+
+ // Ensure that repograph.Map updates branch membership as well.
+ m := repograph.Map{"dummy": repo}
+ c6hash := gb.CommitGen(ctx, "blah")
+ c6details, err := git.GitDir(gb.Dir()).Details(ctx, c6hash)
+ assert.NoError(t, err)
+ rf.Refresh(c6details)
+ assert.NoError(t, m.Update(ctx))
+ c6 := repo.Get(c6hash)
+ assert.NotNil(t, c6)
+ test(c1, "master", "branch2")
+ test(c2, "master", "branch2")
+ test(c3, "branch2")
+ test(c4, "master")
+ test(c5, "master")
+ test(c6, "master")
+}
diff --git a/go/gitstore/bt_gitstore/repo_impl_test.go b/go/gitstore/bt_gitstore/repo_impl_test.go
index 50a695b..3494e18 100644
--- a/go/gitstore/bt_gitstore/repo_impl_test.go
+++ b/go/gitstore/bt_gitstore/repo_impl_test.go
@@ -119,6 +119,13 @@
repograph_shared_tests.TestRecurseAllBranches(t, ctx, g, repo, ud)
}
+func TestLogLinearBTGitStore(t *testing.T) {
+ unittest.LargeTest(t)
+ ctx, g, repo, ud, cleanup := setupGitStore(t)
+ defer cleanup()
+ repograph_shared_tests.TestLogLinear(t, ctx, g, repo, ud)
+}
+
func TestUpdateHistoryChangedBTGitStore(t *testing.T) {
unittest.LargeTest(t)
ctx, g, repo, ud, cleanup := setupGitStore(t)
@@ -139,3 +146,10 @@
defer cleanup()
repograph_shared_tests.TestRevList(t, ctx, g, repo, ud)
}
+
+func TestBranchMembershipBTGitStore(t *testing.T) {
+ unittest.LargeTest(t)
+ ctx, g, repo, ud, cleanup := setupGitStore(t)
+ defer cleanup()
+ repograph_shared_tests.TestBranchMembership(t, ctx, g, repo, ud)
+}
diff --git a/go/gitstore/repo_impl_test.go b/go/gitstore/repo_impl_test.go
index 596dec1..2d1233f 100644
--- a/go/gitstore/repo_impl_test.go
+++ b/go/gitstore/repo_impl_test.go
@@ -166,6 +166,13 @@
repograph_shared_tests.TestRecurseAllBranches(t, ctx, g, repo, ud)
}
+func TestLogLinearGitStore(t *testing.T) {
+ unittest.LargeTest(t)
+ ctx, g, repo, ud, cleanup := setupGitStore(t)
+ defer cleanup()
+ repograph_shared_tests.TestLogLinear(t, ctx, g, repo, ud)
+}
+
func TestUpdateHistoryChangedGitStore(t *testing.T) {
unittest.LargeTest(t)
ctx, g, repo, ud, cleanup := setupGitStore(t)
@@ -186,3 +193,10 @@
defer cleanup()
repograph_shared_tests.TestRevList(t, ctx, g, repo, ud)
}
+
+func TestBranchMembershipGitStore(t *testing.T) {
+ unittest.LargeTest(t)
+ ctx, g, repo, ud, cleanup := setupGitStore(t)
+ defer cleanup()
+ repograph_shared_tests.TestBranchMembership(t, ctx, g, repo, ud)
+}
diff --git a/go/vcsinfo/types.go b/go/vcsinfo/types.go
index 593ff23..857848e 100644
--- a/go/vcsinfo/types.go
+++ b/go/vcsinfo/types.go
@@ -16,9 +16,9 @@
MaxTime = time.Unix(9999999999, 0)
)
-// IndexCommit is information about a commit that includes the offset from
-// the first commit in the repository. The first commit in the branch has Index 0.
-// Usually the indexing makes most sense the commits on a branch in first-parent order.
+// IndexCommit is information about a commit that includes the commit's index
+// in the linear ancestry path obtained by following each commit's first parent
+// backward in history. The first commit in a given branch has Index 0.
type IndexCommit struct {
Hash string
Index int
@@ -35,10 +35,16 @@
// LongCommit gives more detailed information about a commit.
type LongCommit struct {
*ShortCommit
- Parents []string `json:"parent"`
- Body string `json:"body"`
- Timestamp time.Time `json:"timestamp"`
- Branches map[string]bool `json:"-"`
+ Parents []string `json:"parent"`
+ Body string `json:"body"`
+ Timestamp time.Time `json:"timestamp"`
+ // Index is this Commit's index in the linear ancestry path obtained by
+ // following each commit's first parent backward in history. The first
+ // commit in a given branch has Index 0. This field is not set by
+ // default.
+ Index int `json:"-"`
+ // Branches indicates which branches can reach this commit.
+ Branches map[string]bool `json:"-"`
}
func NewLongCommit() *LongCommit {