| package repograph |
| |
| /* |
| The repograph package provides an in-memory representation of an entire Git repo. |
| */ |
| |
| import ( |
| "context" |
| "encoding/gob" |
| "errors" |
| "fmt" |
| "io" |
| "sort" |
| "sync" |
| "time" |
| |
| "github.com/willf/bitset" |
| |
| "go.skia.org/infra/go/git" |
| "go.skia.org/infra/go/metrics2" |
| "go.skia.org/infra/go/skerr" |
| "go.skia.org/infra/go/sklog" |
| "go.skia.org/infra/go/timer" |
| "go.skia.org/infra/go/util" |
| "go.skia.org/infra/go/vcsinfo" |
| ) |
| |
| var ( |
| ErrStopRecursing = errors.New("Stop recursing") |
| ) |
| |
| // Commit represents a commit in a Git repo. |
| type Commit struct { |
| *vcsinfo.LongCommit |
| parents []*Commit |
| } |
| |
| // GetParents returns the parents of this commit. |
| func (c *Commit) GetParents() []*Commit { |
| return c.parents |
| } |
| |
| // AddParent adds the given commit to the list of parents. |
| func (c *Commit) AddParent(p *Commit) { |
| c.parents = append(c.parents, p) |
| } |
| |
| // Recurse runs the given function recursively over commit history, starting |
| // at the given commit. The function accepts the current Commit as a parameter. |
| // Returning ErrStopRecursing from the function indicates that recursion should |
| // stop for the current branch, however, recursion will continue for any other |
| // branches until they are similarly terminated. Returning any other error |
| // causes recursion to stop without properly terminating other branches. The |
| // error will bubble to the top and be returned. Here's an example of printing |
| // out the entire ancestry of a given commit: |
| // |
| // commit.Recurse(func(c *Commit) error { |
| // sklog.Info(c.Hash) |
| // return nil |
| // }) |
| // |
| // If the passed-in function returns no errors other than ErrStopRecursing, |
| // Recurse() will never return an error. |
| func (c *Commit) Recurse(f func(*Commit) error) error { |
| return c.recurse(f, make(map[*Commit]bool, c.Index+1)) |
| } |
| |
| // recurse is a helper function used by Recurse. |
| func (c *Commit) recurse(f func(*Commit) error, visited map[*Commit]bool) error { |
| // For large repos, we may not have enough stack space to recurse |
| // through the whole commit history. Since most commits only have |
| // one parent, avoid recursion when possible. |
| for { |
| visited[c] = true |
| if err := f(c); err == ErrStopRecursing { |
| return nil |
| } else if err != nil { |
| return err |
| } |
| if len(c.parents) == 0 { |
| return nil |
| } else if len(c.parents) == 1 { |
| p := c.parents[0] |
| if visited[p] { |
| return nil |
| } |
| c = p |
| } else { |
| break |
| } |
| } |
| if len(c.Parents) > 1 { |
| for _, p := range c.parents { |
| if visited[p] { |
| continue |
| } |
| if err := p.recurse(f, visited); err != nil { |
| return err |
| } |
| } |
| } |
| return nil |
| } |
| |
| // RecurseFirstParent is like Recurse, but it only follows the first parent of |
| // each commit. Like Recurse, if the passed-in function returns no errors other |
| // than ErrStopRecursing, RecurseFirstParent() will never return an error. |
| func (c *Commit) RecurseFirstParent(f func(*Commit) error) error { |
| for { |
| if err := f(c); err == ErrStopRecursing { |
| return nil |
| } else if err != nil { |
| return err |
| } |
| if len(c.parents) == 0 { |
| return nil |
| } else { |
| c = c.parents[0] |
| } |
| } |
| } |
| |
| // AllCommits returns the hashes of all commits reachable from this Commit, in |
| // reverse topological order. |
| func (c *Commit) AllCommits() ([]string, error) { |
| commits := make(map[*Commit]bool, c.Index+1) |
| if err := c.recurse(func(c *Commit) error { |
| return nil |
| }, commits); err != nil { |
| return nil, err |
| } |
| |
| // Topologically sort the commits. |
| sorted := topologicalSortHelper(commits) |
| return CommitSlice(sorted).Hashes(), nil |
| } |
| |
| // HasAncestor returns true iff the given commit is an ancestor of this commit. |
| func (c *Commit) HasAncestor(other string) bool { |
| found := false |
| if err := c.Recurse(func(commit *Commit) error { |
| if commit.Hash == other { |
| found = true |
| return ErrStopRecursing |
| } |
| return nil |
| }); err != nil { |
| // Our function doesn't return an error, so we shouldn't hit |
| // this case. |
| sklog.Errorf("Error in Commit.Recurse: %s", err) |
| } |
| return found |
| } |
| |
| // Less compares this Commit to the other Commit and returns true if it is |
| // considered "less" than the other. This is used as a helper function in |
| // sorting to ensure stability. Uses the timestamp as the primary sort function |
| // and the hash as a tie breaker. Note that, because we sort in reverse |
| // chronological order, this has the opposite meaning that it seems it should. |
| func (c *Commit) Less(other *Commit) bool { |
| if c.Timestamp.Equal(other.Timestamp) { |
| // If the timestamps are equal, just use the commit hash as an |
| // arbitrary way of comparing the two. |
| return c.Hash > other.Hash |
| } |
| return c.Timestamp.After(other.Timestamp) |
| } |
| |
| // Helpers for sorting. |
| type CommitSlice []*Commit |
| |
| func (s CommitSlice) Len() int { return len(s) } |
| func (s CommitSlice) Less(a, b int) bool { return s[a].Less(s[b]) } |
| func (s CommitSlice) Swap(a, b int) { s[a], s[b] = s[b], s[a] } |
| |
| // Hashes returns a slice of commit hashes corresponding to the commits in s. |
| func (s CommitSlice) Hashes() []string { |
| rv := make([]string, 0, len(s)) |
| for _, c := range s { |
| rv = append(rv, c.Hash) |
| } |
| return rv |
| } |
| |
| // RepoImpl provides methods for interacting with a git repo, agnostic of how the |
| // repo is actually accessed. It is used when updating a Graph. It should not be |
| // used concurrently. |
| type RepoImpl interface { |
| // Update the local view of the repo. |
| Update(context.Context) error |
| |
| // Return the details for the given commit. The Graph takes ownership |
| // of the returned LongCommit and may modify it. |
| Details(context.Context, string) (*vcsinfo.LongCommit, error) |
| |
| // Return the branch heads, as of the last call to Update(). |
| Branches(context.Context) ([]*git.Branch, error) |
| |
| // UpdateCallback is a function which is called after the Graph is |
| // updated but before the changes are committed. If UpdateCallback |
| // returns an error, the changes are not committed. This allows the |
| // RepoImpl to perform caching, for example. The parameters are the |
| // list of commits which were added during the current Update, the |
| // list of commits which were removed, and the new state of the Graph. |
| UpdateCallback(context.Context, []*vcsinfo.LongCommit, []*vcsinfo.LongCommit, *Graph) error |
| } |
| |
| // Graph represents an entire Git repo. |
| type Graph struct { |
| branches []*git.Branch |
| commits map[string]*Commit |
| graphMtx sync.RWMutex |
| |
| updateMtx sync.Mutex |
| repoImpl RepoImpl |
| } |
| |
| // NewWithRepoImpl returns a Graph instance which uses the given RepoImpl. The |
| // RepoImpl should be initialized (via Update() or otherwise) before being |
| // passed to NewWithRepoImpl. |
| func NewWithRepoImpl(ctx context.Context, ri RepoImpl) (*Graph, error) { |
| rv := &Graph{ |
| commits: map[string]*Commit{}, |
| repoImpl: ri, |
| } |
| if _, _, err := rv.updateFrom(ctx, rv.repoImpl); err != nil { |
| return nil, err |
| } |
| return rv, nil |
| } |
| |
| // gobGraph is a utility struct used for serializing a Graph using gob. |
| type gobGraph struct { |
| Branches []*git.Branch |
| Commits map[string]*vcsinfo.LongCommit |
| } |
| |
| // NewFromGob reads a Graph from the given Reader in gob format. |
| func NewFromGob(ctx context.Context, r io.Reader, ri RepoImpl) (*Graph, error) { |
| var g gobGraph |
| if err := gob.NewDecoder(r).Decode(&g); err != nil { |
| return nil, skerr.Wrapf(err, "Failed to decode Graph from gob.") |
| } |
| // Temporarily use an in-memory RepoImpl to build the Graph. |
| mem := NewMemCacheRepoImpl(g.Commits, g.Branches) |
| rv := &Graph{ |
| commits: make(map[string]*Commit, len(g.Commits)), |
| repoImpl: mem, |
| } |
| if err := rv.Update(ctx); err != nil { |
| return nil, skerr.Wrapf(err, "Failed to initialize Graph from Gob") |
| } |
| // Swap to the passed-in RepoImpl. |
| rv.repoImpl = ri |
| return rv, nil |
| } |
| |
| // WriteGob writes the Graph to the given Writer using gob format. |
| func (r *Graph) WriteGob(w io.Writer) error { |
| r.graphMtx.RLock() |
| defer r.graphMtx.RUnlock() |
| commits := make(map[string]*vcsinfo.LongCommit, len(r.commits)) |
| for hash, c := range r.commits { |
| commits[hash] = c.LongCommit |
| } |
| return gob.NewEncoder(w).Encode(gobGraph{ |
| Branches: r.branches, |
| Commits: commits, |
| }) |
| } |
| |
| // Len returns the number of commits in the repo. |
| func (r *Graph) Len() int { |
| r.graphMtx.RLock() |
| defer r.graphMtx.RUnlock() |
| return len(r.commits) |
| } |
| |
| // UpdateBranchInfo updates the membership of all commits in all branches. A |
| // commit is considered a member of a branch if and only if it is reachable by |
| // tracing the *first parent* lineage backward from the branch head. Notably, |
| // this will result in different branch membership than would be obtained by |
| // `git branch --all --list --contains <hash>` as is used elsewhere. Modifies |
| // the Commit objects stored in the graph and returns the LongCommits associated |
| // with the modified Commits. Clients MUST NOT modify the Branches on the |
| // returned LongCommits; as a space-saving optimization, we use the same map |
| // instance for all commits which belong to the same set of branches. |
| func (r *Graph) UpdateBranchInfo() []*vcsinfo.LongCommit { |
| sklog.Infof("UpdateBranchInfo") |
| defer timer.New("UpdateBranchInfo").Stop() |
| branches := r.BranchHeads() |
| commits := r.GetAll() |
| |
| // Use a map of Commit pointers to BitSets. Pointers are a little faster |
| // than keying by commit hash and is safe because our commit pointers |
| // never change (the parent pointers rely on this behavior). The BitSet |
| // indicates which branch (by index in the branches slice) the commit |
| // belongs to. |
| membership := make(map[*Commit]*bitset.BitSet, len(commits)) |
| for _, c := range commits { |
| membership[c] = bitset.New(uint(len(branches))) |
| } |
| for idx, branch := range branches { |
| // Our func never returns an error, so we don't need to check |
| // the return value here. |
| _ = commits[branch.Head].RecurseFirstParent(func(c *Commit) error { |
| membership[c].Set(uint(idx)) |
| return nil |
| }) |
| } |
| total := int64(0) |
| rv := []*vcsinfo.LongCommit{} |
| // branchMaps stores all of the combinations of branch memberships |
| // for all commits, keyed by bitset in string form. |
| branchMaps := map[string]map[string]bool{} |
| for _, c := range commits { |
| bs := membership[c] |
| if bs != nil { |
| key := bs.String() |
| branchMap, ok := branchMaps[key] |
| if !ok { |
| branchMap = make(map[string]bool, bs.Count()) |
| for idx, branch := range branches { |
| if bs.Test(uint(idx)) { |
| branchMap[branch.Name] = true |
| } |
| } |
| branchMaps[key] = branchMap |
| } |
| if !util.StringSet(c.Branches).Equals(branchMap) { |
| rv = append(rv, c.LongCommit) |
| c.Branches = branchMap |
| } |
| } else if len(c.Branches) != 0 { |
| c.Branches = nil |
| rv = append(rv, c.LongCommit) |
| } |
| total += int64(len(c.Branches)) |
| } |
| return rv |
| } |
| |
| // addCommit adds the given commit to the Graph. Requires that the commit's |
| // parents are already in the Graph. Assumes that the caller holds r.graphMtx. |
| func (r *Graph) addCommit(lc *vcsinfo.LongCommit) error { |
| defer metrics2.FuncTimer().Stop() |
| var parents []*Commit |
| if len(lc.Parents) > 0 { |
| for _, h := range lc.Parents { |
| if h == "" { |
| continue |
| } |
| p, ok := r.commits[h] |
| if !ok { |
| return fmt.Errorf("repograph.Graph: Could not find parent commit %q", h) |
| } |
| parents = append(parents, p) |
| if lc.Index == 0 { |
| lc.Index = p.Index + 1 |
| } |
| } |
| } |
| |
| c := &Commit{ |
| LongCommit: lc, |
| parents: parents, |
| } |
| // Ignore any previously-set branch information. If desired, the client |
| // can call UpdateBranchInfo. |
| c.Branches = nil |
| r.commits[c.Hash] = c |
| return nil |
| } |
| |
| // updateFrom updates the Graph using the given RepoImpl and returns the lists of |
| // new and deleted commits, or any error which occurred. |
| func (r *Graph) updateFrom(ctx context.Context, ri RepoImpl) ([]*vcsinfo.LongCommit, []*vcsinfo.LongCommit, error) { |
| // Retrieve the new commits. |
| |
| // Obtain the list of branches. |
| newBranchesList, err := ri.Branches(ctx) |
| if err != nil { |
| return nil, nil, fmt.Errorf("Failed to obtain branch list from RepoImpl: %s", err) |
| } |
| newBranchesMap := make(map[string]string, len(newBranchesList)) |
| for _, branch := range newBranchesList { |
| newBranchesMap[branch.Name] = branch.Head |
| } |
| sort.Sort(git.BranchList(newBranchesList)) |
| r.graphMtx.Lock() |
| defer r.graphMtx.Unlock() |
| oldBranchesMap := make(map[string]string, len(r.branches)) |
| for _, branch := range r.branches { |
| oldBranchesMap[branch.Name] = branch.Head |
| } |
| |
| // Load new commits. |
| var newCommits []*vcsinfo.LongCommit |
| |
| needOrphanCheck := false |
| for _, branch := range newBranchesList { |
| newHead := newBranchesMap[branch.Name] |
| oldHead := oldBranchesMap[branch.Name] |
| |
| // Shortcut: if the branch is up-to-date, skip it. |
| if newHead == oldHead { |
| continue |
| } |
| |
| // Trace back in time from the new branch head until we find the |
| // old branch head, any other commit we already have, or a |
| // commit with no parents. |
| toProcess := map[string]bool{newHead: true} |
| for len(toProcess) > 0 { |
| // Choose a commit to process. |
| var c string |
| for commit := range toProcess { |
| c = commit |
| break |
| } |
| delete(toProcess, c) |
| |
| // If we've seen this commit before, we're done. |
| if c == oldHead { |
| continue |
| } |
| if _, ok := r.commits[c]; ok { |
| if oldHead != "" { |
| // If we found a previously-known commit before |
| // we found the old branch head, then history |
| // has changed and we need to run the orphan |
| // check. |
| sklog.Warningf("Found existing commit %s before old branch head %s. Need to perform orphan check.", c, oldHead) |
| needOrphanCheck = true |
| } |
| continue |
| } |
| |
| // We haven't seen this commit before; add it to newCommits. |
| details, err := ri.Details(ctx, c) |
| if err != nil { |
| return nil, nil, fmt.Errorf("Failed to Get commit details from RepoImpl: %s", err) |
| } |
| newCommits = append(newCommits, details) |
| |
| // Add the commit's parent(s) to the toProcess map. |
| for _, p := range details.Parents { |
| toProcess[p] = true |
| } |
| if len(details.Parents) == 0 && oldHead != "" { |
| // If we found a commit with no parents and this |
| // is not a new branch, then we've discovered a |
| // completely new line of history and need to |
| // check whether the commits on the old line are |
| // now orphaned. |
| sklog.Warningf("Found commit %s with no parents which we haven't seen before.", c) |
| needOrphanCheck = true |
| } |
| } |
| } |
| |
| // Add newCommits in reverse order to ensure that all parents are added |
| // before their children. |
| addedCommits := make([]*vcsinfo.LongCommit, 0, len(newCommits)) |
| for i := len(newCommits) - 1; i >= 0; i-- { |
| commit := newCommits[i] |
| if _, ok := r.commits[commit.Hash]; !ok { |
| if err := r.addCommit(commit); err != nil { |
| return nil, nil, fmt.Errorf("Failed to add commit: %s", err) |
| } |
| addedCommits = append(addedCommits, commit) |
| } |
| } |
| |
| if !needOrphanCheck { |
| // Check to see whether any branches were deleted. |
| for branch := range oldBranchesMap { |
| if _, ok := newBranchesMap[branch]; !ok { |
| sklog.Warningf("Branch %s was deleted; need to check for orphaned commits.", branch) |
| needOrphanCheck = true |
| break |
| } |
| } |
| } |
| var removedCommits []*vcsinfo.LongCommit |
| if needOrphanCheck { |
| sklog.Warningf("History change detected; checking for orphaned commits.") |
| visited := make(map[*Commit]bool, len(r.commits)) |
| for _, newBranchHead := range newBranchesMap { |
| // Not using Get() because graphMtx is locked. |
| if err := r.commits[newBranchHead].recurse(func(c *Commit) error { |
| return nil |
| }, visited); err != nil { |
| return nil, nil, fmt.Errorf("Failed to trace commit history during orphan check: %s", err) |
| } |
| } |
| for hash, c := range r.commits { |
| if !visited[c] { |
| sklog.Warningf("Commit %s is orphaned. Removing from the Graph.", hash) |
| delete(r.commits, hash) |
| removedCommits = append(removedCommits, c.LongCommit) |
| } |
| } |
| } |
| |
| // Update the rest of the Graph. |
| r.branches = newBranchesList |
| return addedCommits, removedCommits, nil |
| } |
| |
| // update the Graph, returning any added and removed commits, and run the given |
| // callback function on the Graph before the changes are committed. If the |
| // callback returns an error, the changes are not committed. |
| func (r *Graph) update(ctx context.Context, cb func(*Graph) error) ([]*vcsinfo.LongCommit, []*vcsinfo.LongCommit, error) { |
| r.updateMtx.Lock() |
| defer r.updateMtx.Unlock() |
| defer metrics2.FuncTimer().Stop() |
| |
| // Update the RepoImpl. |
| if err := r.repoImpl.Update(ctx); err != nil { |
| return nil, nil, fmt.Errorf("Failed RepoImpl.Update(): %s", err) |
| } |
| |
| newGraph := r.shallowCopy() |
| added, removed, err := newGraph.updateFrom(ctx, r.repoImpl) |
| if err != nil { |
| return nil, nil, err |
| } |
| if cb != nil { |
| if err := cb(newGraph); err != nil { |
| return nil, nil, err |
| } |
| } |
| if err := r.repoImpl.UpdateCallback(ctx, added, removed, newGraph); err != nil { |
| return nil, nil, err |
| } |
| r.graphMtx.Lock() |
| defer r.graphMtx.Unlock() |
| r.branches = newGraph.branches |
| r.commits = newGraph.commits |
| sklog.Infof("Graph update finished; have %d commits, added %d, removed %d", len(r.commits), len(added), len(removed)) |
| return added, removed, nil |
| } |
| |
| // Update the Graph. |
| func (r *Graph) Update(ctx context.Context) error { |
| _, _, err := r.update(ctx, nil) |
| return err |
| } |
| |
| // UpdateAndReturnCommitDiffs updates the Graph and returns the added and |
| // removed commits, in arbitrary order. |
| func (r *Graph) UpdateAndReturnCommitDiffs(ctx context.Context) ([]*vcsinfo.LongCommit, []*vcsinfo.LongCommit, error) { |
| return r.update(ctx, nil) |
| } |
| |
| // UpdateWithCallback updates the Graph and runs the given function before the |
| // changes are committed. If the function returns an error, the changes are not |
| // committed. The function is allowed to call non-Update methods of the Graph. |
| func (r *Graph) UpdateWithCallback(ctx context.Context, cb func(*Graph) error) error { |
| _, _, err := r.update(ctx, cb) |
| return err |
| } |
| |
| // Branches returns the list of known branches in the repo. |
| func (r *Graph) Branches() []string { |
| r.graphMtx.RLock() |
| defer r.graphMtx.RUnlock() |
| rv := make([]string, 0, len(r.branches)) |
| for _, b := range r.branches { |
| rv = append(rv, b.Name) |
| } |
| return rv |
| } |
| |
| // BranchHeads returns the set of branch heads from the repo. |
| func (r *Graph) BranchHeads() []*git.Branch { |
| branches := r.Branches() |
| branchHeads := make([]*git.Branch, 0, len(branches)) |
| for _, b := range branches { |
| branchHeads = append(branchHeads, &git.Branch{ |
| Name: b, |
| Head: r.Get(b).Hash, |
| }) |
| } |
| return branchHeads |
| } |
| |
| // shallowCopy() returns a shallow copy of the Graph, ie. the pointers in the |
| // old and new Graphs will remain equal for a given Commit. |
| func (r *Graph) shallowCopy() *Graph { |
| r.graphMtx.RLock() |
| defer r.graphMtx.RUnlock() |
| newCommits := make(map[string]*Commit, len(r.commits)) |
| for k, v := range r.commits { |
| newCommits[k] = v |
| } |
| newBranches := make([]*git.Branch, 0, len(r.branches)) |
| for _, branch := range r.branches { |
| newBranches = append(newBranches, &git.Branch{ |
| Head: branch.Head, |
| Name: branch.Name, |
| }) |
| } |
| return &Graph{ |
| branches: newBranches, |
| commits: newCommits, |
| } |
| } |
| |
| // Get returns a Commit object for the given ref, if such a commit exists. This |
| // function does not understand complex ref types (eg. HEAD~3); only branch |
| // names and full commit hashes are accepted. |
| func (r *Graph) Get(ref string) *Commit { |
| r.graphMtx.RLock() |
| defer r.graphMtx.RUnlock() |
| if c, ok := r.commits[ref]; ok { |
| return c |
| } |
| for _, b := range r.branches { |
| if ref == b.Name { |
| if c, ok := r.commits[b.Head]; ok { |
| return c |
| } |
| } |
| } |
| return nil |
| } |
| |
| // GetAll returns a map containing all of the stored Commit objects keyed by |
| // commit hash. |
| func (r *Graph) GetAll() map[string]*Commit { |
| r.graphMtx.RLock() |
| defer r.graphMtx.RUnlock() |
| rv := make(map[string]*Commit, len(r.commits)) |
| for k, v := range r.commits { |
| rv[k] = v |
| } |
| return rv |
| } |
| |
| // RecurseCommits runs the given function recursively over the given refs, which |
| // can be either commit hashes or branch names. The function accepts the current |
| // Commit as a parameter. Returning ErrStopRecursing from the function indicates |
| // that recursion should stop for the current branch, however, recursion will |
| // continue for any other branches until they are similarly terminated. |
| // Returning any other error causes recursion to stop without properly |
| // terminating other branches. The error will bubble to the top and be returned. |
| // Here's an example of printing out all of the commits reachable from a given |
| // set of commits: |
| // |
| // commits := []string{...} |
| // |
| // err := repo.RecurseCommits(commits, func(c *Commit) error { |
| // sklog.Info(c.Hash) |
| // return nil |
| // }) |
| func (r *Graph) RecurseCommits(commits []string, f func(*Commit) error) error { |
| visited := make(map[*Commit]bool, r.Len()) |
| for _, hash := range commits { |
| c := r.Get(hash) |
| if c == nil { |
| return fmt.Errorf("Unknown commit %q", hash) |
| } |
| if !visited[c] { |
| if err := c.recurse(f, visited); err != nil { |
| return err |
| } |
| } |
| } |
| return nil |
| } |
| |
| // RecurseAllBranches runs the given function recursively over the entire commit |
| // history, starting at each of the known branch heads. The function accepts the |
| // current Commit as a parameter. Returning ErrStopRecursing from the function |
| // indicates that recursion should stop for the current branch, however, |
| // recursion will continue for any other branches until they are similarly |
| // terminated. Returning any other error causes recursion to stop without |
| // properly terminating other branches. The error will bubble to the top and be |
| // returned. Here's an example of printing out all of the commits in the repo: |
| // |
| // repo.RecurseAllBranches(func(c *Commit) error { |
| // sklog.Info(c.Hash) |
| // return nil |
| // }) |
| func (r *Graph) RecurseAllBranches(f func(*Commit) error) error { |
| return r.RecurseCommits(r.Branches(), f) |
| } |
| |
| // RevList is the equivalent of "git rev-list --topo-order from..to". |
| // Each argument is a commit hash or a branch name. The commits are returned |
| // in reverse topological order. Per "git rev-list" docs, the returned commit |
| // hashes consist of all commits reachable from "to" and not reachable from |
| // "from". In the typical case of linear git history, this just means the |
| // commits on the line after "from" up through "to", but with branches and |
| // merges it's possible that there are entire sub-graphs which are reachable |
| // from "to" but not "from". |
| func (r *Graph) RevList(from, to string) ([]string, error) { |
| fromCommit := r.Get(from) |
| if fromCommit == nil { |
| return nil, fmt.Errorf("Unknown commit %q", from) |
| } |
| toCommit := r.Get(to) |
| if toCommit == nil { |
| return nil, fmt.Errorf("Unknown commit %q", to) |
| } |
| // Shortcut: if the commits are the same, return now. |
| if fromCommit == toCommit { |
| return []string{}, nil |
| } |
| |
| // Find all of the excluded commits. |
| exclude := make(map[*Commit]bool, r.Len()) |
| if err := fromCommit.recurse(func(c *Commit) error { |
| return nil |
| }, exclude); err != nil { |
| return nil, err |
| } |
| |
| // Find the included commits. |
| include := make(map[*Commit]bool, r.Len()) |
| if err := toCommit.recurse(func(c *Commit) error { |
| if exclude[c] { |
| return ErrStopRecursing |
| } |
| return nil |
| }, include); err != nil { |
| return nil, err |
| } |
| |
| // include may contain some commits from the exclude map; remove them. |
| for c := range include { |
| if exclude[c] { |
| delete(include, c) |
| } |
| } |
| |
| // Topologically sort the commits. |
| sorted := topologicalSortHelper(include) |
| return CommitSlice(sorted).Hashes(), nil |
| } |
| |
| // TopologicalSort returns a slice containing the given commits in reverse |
| // topological order, ie. every commit is listed before any of its parents. |
| func TopologicalSort(commits []*Commit) []*Commit { |
| commitsMap := make(map[*Commit]bool, len(commits)) |
| for _, c := range commits { |
| commitsMap[c] = true |
| } |
| return topologicalSortHelper(commitsMap) |
| } |
| |
| // Helper function for TopologicalSort; the caller provides the map, which is |
| // modified by topologicalSortHelper. |
| func topologicalSortHelper(commits map[*Commit]bool) []*Commit { |
| // children maps each commit to those commits which have it as a parent. |
| children := make(map[*Commit]map[*Commit]bool, len(commits)) |
| for c := range commits { |
| for _, p := range c.parents { |
| if commits[p] { |
| subMap, ok := children[p] |
| if !ok { |
| subMap = map[*Commit]bool{} |
| children[p] = subMap |
| } |
| subMap[c] = true |
| } |
| } |
| } |
| |
| // Sort the commits topologically. |
| rv := make([]*Commit, 0, len(commits)) |
| followBranch := func(c *Commit) { |
| for len(children[c]) == 0 { |
| // Add this commit to rv. |
| rv = append(rv, c) |
| delete(commits, c) |
| |
| // Remove this commit from its parents' children, so |
| // that they can be processed. |
| for _, p := range c.parents { |
| if commits[p] { |
| delete(children[p], c) |
| } |
| } |
| |
| // Find a parent to process next. |
| var next *Commit |
| for _, p := range c.parents { |
| if commits[p] && len(children[p]) == 0 { |
| // We are ready to process this parent. |
| if next == nil || p.Less(next) { |
| next = p |
| } |
| } |
| } |
| if next != nil { |
| c = next |
| } else { |
| // None of this commit's parents are ready; we can't do |
| // any more. |
| return |
| } |
| } |
| } |
| for len(commits) > 0 { |
| var next *Commit |
| for commit := range commits { |
| if len(children[commit]) == 0 { |
| // We are ready to process this commit. |
| if next == nil || commit.Less(next) { |
| next = commit |
| } |
| } |
| } |
| if next == nil { |
| sklog.Error("No commits are ready to process!") |
| // Return so that we don't loop forever. |
| return rv |
| } |
| followBranch(next) |
| } |
| return rv |
| } |
| |
| // LogLinear is equivalent to "git log --first-parent --ancestry-path from..to", |
| // ie. it only returns commits which are on the direct path from A to B, and |
| // only on the "main" branch. This is as opposed to "git log from..to" which |
| // returns all commits which are ancestors of 'to' but not 'from'. The 'from' |
| // commit may be the empty string, in which case all commits in the first-parent |
| // line are returned. |
| func (r *Graph) LogLinear(from, to string) ([]*Commit, error) { |
| fromCommit := r.Get(from) |
| if fromCommit == nil && from != "" { |
| return nil, skerr.Fmt("No such commit %q", from) |
| } |
| toCommit := r.Get(to) |
| if toCommit == nil { |
| return nil, skerr.Fmt("No such commit %q", to) |
| } |
| rv := []*Commit{} |
| found := false |
| _ = toCommit.RecurseFirstParent(func(c *Commit) error { |
| if c == fromCommit { |
| found = true |
| return ErrStopRecursing |
| } |
| rv = append(rv, c) |
| return nil |
| }) |
| if !found && from != "" { |
| // If the "from" commit was not found, then there is no ancestry |
| // path from "from" to "to". |
| return nil, nil |
| } |
| return rv, nil |
| } |
| |
| // IsAncestor returns true iff A is an ancestor of B, where A and B are either |
| // commit hashes or branch names. |
| func (r *Graph) IsAncestor(a, b string) (bool, error) { |
| aCommit := r.Get(a) |
| if aCommit == nil { |
| return false, fmt.Errorf("No such commit %q", a) |
| } |
| bCommit := r.Get(b) |
| if bCommit == nil { |
| return false, fmt.Errorf("No such commit %q", b) |
| } |
| return bCommit.HasAncestor(aCommit.Hash), nil |
| } |
| |
| // Return any commits at or after the given timestamp. |
| func (r *Graph) GetCommitsNewerThan(ts time.Time) ([]*vcsinfo.LongCommit, error) { |
| commits := []*Commit{} |
| if err := r.RecurseAllBranches(func(c *Commit) error { |
| if !c.Timestamp.Before(ts) { |
| commits = append(commits, c) |
| return nil |
| } |
| return ErrStopRecursing |
| }); err != nil { |
| return nil, err |
| } |
| |
| // Sort the commits by timestamp, most recent first. |
| sort.Sort(CommitSlice(commits)) |
| |
| rv := make([]*vcsinfo.LongCommit, 0, len(commits)) |
| for _, c := range commits { |
| rv = append(rv, c.LongCommit) |
| } |
| return rv, nil |
| } |
| |
| // GetLastNCommits returns the last N commits in the repo. |
| func (r *Graph) GetLastNCommits(n int) ([]*vcsinfo.LongCommit, error) { |
| // Find the last Nth commit on the main branch, which we assume has far more |
| // commits than any other branch. |
| commit := r.Get(git.MainBranch) |
| if commit == nil { |
| commit = r.Get(git.MasterBranch) |
| } |
| if commit == nil { |
| if len(r.branches) > 0 { |
| commit = r.Get(r.branches[0].Head) |
| } |
| } |
| for i := 0; i < n-1; i++ { |
| p := commit.GetParents() |
| if len(p) < 1 { |
| // Cut short if we've hit the beginning of history. |
| break |
| } |
| commit = p[0] |
| } |
| commits, err := r.GetCommitsNewerThan(commit.Timestamp) |
| if err != nil { |
| return nil, err |
| } |
| |
| // Return the most-recent N commits. |
| if n > len(commits) { |
| n = len(commits) |
| } |
| return commits[:n], nil |
| } |
| |
| // Map is a convenience type for dealing with multiple Graphs for different |
| // repos. The keys are repository URLs. |
| type Map map[string]*Graph |
| |
| // update the Graphs in the Map, returning any added and removed commits, and |
| // run the given callback function on the Graphs before the changes are |
| // committed. If the callback returns an error for any Graph, the changes are |
| // not committed. |
| func (m Map) update(ctx context.Context, cb func(string, *Graph) error) (map[string][]*vcsinfo.LongCommit, map[string][]*vcsinfo.LongCommit, error) { |
| added := make(map[string][]*vcsinfo.LongCommit, len(m)) |
| removed := make(map[string][]*vcsinfo.LongCommit, len(m)) |
| newGraphs := make(map[string]*Graph, len(m)) |
| for repoUrl, r := range m { |
| r.updateMtx.Lock() |
| defer r.updateMtx.Unlock() |
| newGraph := r.shallowCopy() |
| sklog.Info("Updating RepoImpl...") |
| if err := r.repoImpl.Update(ctx); err != nil { |
| return nil, nil, skerr.Wrap(err) |
| } |
| a, r, err := newGraph.updateFrom(ctx, r.repoImpl) |
| if err != nil { |
| return nil, nil, skerr.Wrap(err) |
| } |
| added[repoUrl] = a |
| removed[repoUrl] = r |
| newGraphs[repoUrl] = newGraph |
| if cb != nil { |
| if err := cb(repoUrl, newGraph); err != nil { |
| return nil, nil, skerr.Wrap(err) |
| } |
| } |
| } |
| for repoUrl, r := range m { |
| r.graphMtx.Lock() |
| defer r.graphMtx.Unlock() |
| newGraph := newGraphs[repoUrl] |
| r.branches = newGraph.branches |
| r.commits = newGraph.commits |
| } |
| return added, removed, nil |
| } |
| |
| // Update all Graphs in the Map. |
| func (m Map) Update(ctx context.Context) error { |
| _, _, err := m.update(ctx, nil) |
| return skerr.Wrap(err) |
| } |
| |
| // UpdateAndReturnCommitDiffs updates all Graphs in the Map. Returns maps of |
| // repo URLs to slices of added commits, repo URLs to slices of removed commits, |
| // or any error which was encountered. If any Graph failed to update, no changes |
| // are committed. |
| func (m Map) UpdateAndReturnCommitDiffs(ctx context.Context) (map[string][]*vcsinfo.LongCommit, map[string][]*vcsinfo.LongCommit, error) { |
| return m.update(ctx, nil) |
| } |
| |
| // UpdateWithCallback updates the Graphs in the Map and runs the given function |
| // on each Graph before the changes are committed. If the function returns an |
| // error for any Graph, no changes are committed. The function is allowed to |
| // call non-Update methods of the Graph. |
| func (m Map) UpdateWithCallback(ctx context.Context, cb func(string, *Graph) error) error { |
| _, _, err := m.update(ctx, cb) |
| return err |
| } |
| |
| // FindCommit returns the Commit object, repo URL, and Graph object for the |
| // given commit hash if it exists in any of the Graphs in the Map and an error |
| // otherwise. |
| func (m Map) FindCommit(commit string) (*Commit, string, *Graph, error) { |
| for name, repo := range m { |
| c := repo.Get(commit) |
| if c != nil { |
| return c, name, repo, nil |
| } |
| } |
| return nil, "", nil, fmt.Errorf("Unable to find commit %s in any repo.", commit) |
| } |
| |
| // RepoURLs returns the list of repositories in the Map. |
| func (m Map) RepoURLs() []string { |
| rv := make([]string, 0, len(m)) |
| for r := range m { |
| rv = append(rv, r) |
| } |
| return rv |
| } |