| package repograph |
| |
| /* |
| The repograph package provides an in-memory representation of an entire Git repo. |
| */ |
| |
| import ( |
| "context" |
| "encoding/gob" |
| "errors" |
| "fmt" |
| "io" |
| "path" |
| "sort" |
| "sync" |
| "time" |
| |
| "go.skia.org/infra/go/git" |
| "go.skia.org/infra/go/git/gitinfo" |
| "go.skia.org/infra/go/sklog" |
| "go.skia.org/infra/go/util" |
| "go.skia.org/infra/go/vcsinfo" |
| ) |
| |
| const ( |
| // Name of the file we store inside the Git checkout to speed up the |
| // initial Update(). |
| CACHE_FILE = "sk_gitrepo.gob" |
| ) |
| |
| var ( |
| ErrStopRecursing = errors.New("Stop recursing") |
| ) |
| |
| // Commit represents a commit in a Git repo. |
| type Commit struct { |
| *vcsinfo.LongCommit |
| ParentIndices []int |
| parents []*Commit |
| repo *Graph |
| } |
| |
| // Parents returns the parents of this commit. |
| func (c *Commit) GetParents() []*Commit { |
| return c.parents |
| } |
| |
| // Recurse runs the given function recursively over commit history, starting |
| // at the given commit. The function accepts the current Commit as a parameter. |
| // Returning ErrStopRecursing from the function indicates that recursion should |
| // stop for the current branch, however, recursion will continue for any other |
| // branches until they are similarly terminated. Returning any other error |
| // causes recursion to stop without properly terminating other branches. The |
| // error will bubble to the top and be returned. Here's an example of printing |
| // out the entire ancestry of a given commit: |
| // |
| // commit.Recurse(func(c *Commit) error { |
| // sklog.Info(c.Hash) |
| // return nil |
| // }) |
| func (c *Commit) Recurse(f func(*Commit) error) error { |
| return c.recurse(f, make(map[*Commit]bool, c.repo.Len())) |
| } |
| |
| // recurse is a helper function used by Recurse. |
| func (c *Commit) recurse(f func(*Commit) error, visited map[*Commit]bool) (rvErr error) { |
| // For large repos, we may not have enough stack space to recurse |
| // through the whole commit history. Since most commits only have |
| // one parent, avoid recursion when possible. |
| for { |
| visited[c] = true |
| if err := f(c); err == ErrStopRecursing { |
| return nil |
| } else if err != nil { |
| return err |
| } |
| if len(c.parents) == 0 { |
| return nil |
| } else if len(c.parents) == 1 { |
| p := c.parents[0] |
| if visited[p] { |
| return nil |
| } |
| c = p |
| } else { |
| break |
| } |
| } |
| if len(c.Parents) > 1 { |
| for _, p := range c.parents { |
| if visited[p] { |
| continue |
| } |
| if err := p.recurse(f, visited); err != nil { |
| return err |
| } |
| } |
| } |
| return nil |
| } |
| |
| // AllCommits returns the hashes of all commits reachable from this Commit, in |
| // reverse topological order. |
| func (c *Commit) AllCommits() ([]string, error) { |
| commits := make(map[*Commit]bool, c.repo.Len()) |
| if err := c.recurse(func(c *Commit) error { |
| return nil |
| }, commits); err != nil { |
| return nil, err |
| } |
| |
| // Topologically sort the commits. |
| sorted := topologicalSortHelper(commits) |
| return CommitSlice(sorted).Hashes(), nil |
| } |
| |
| // HasAncestor returns true iff the given commit is an ancestor of this commit. |
| func (c *Commit) HasAncestor(other string) bool { |
| found := false |
| if err := c.Recurse(func(commit *Commit) error { |
| if commit.Hash == other { |
| found = true |
| return ErrStopRecursing |
| } |
| return nil |
| }); err != nil { |
| // Our function doesn't return an error, so we shouldn't hit |
| // this case. |
| sklog.Errorf("Error in Commit.Recurse: %s", err) |
| } |
| return found |
| } |
| |
| // Less compares this Commit to the other Commit and returns true if it is |
| // considered "less" than the other. This is used as a helper function in |
| // sorting to ensure stability. Uses the timestamp as the primary sort function |
| // and the hash as a tie breaker. Note that, because we sort in reverse |
| // chronological order, this has the opposite meaning that it seems it should. |
| func (c *Commit) Less(other *Commit) bool { |
| if c.Timestamp.Equal(other.Timestamp) { |
| // If the timestamps are equal, just use the commit hash as an |
| // arbitrary way of comparing the two. |
| return c.Hash > other.Hash |
| } |
| return c.Timestamp.After(other.Timestamp) |
| } |
| |
| // Helpers for sorting. |
| type CommitSlice []*Commit |
| |
| func (s CommitSlice) Len() int { return len(s) } |
| func (s CommitSlice) Less(a, b int) bool { return s[a].Less(s[b]) } |
| func (s CommitSlice) Swap(a, b int) { s[a], s[b] = s[b], s[a] } |
| |
| // Hashes returns a slice of commit hashes corresponding to the commits in s. |
| func (s CommitSlice) Hashes() []string { |
| rv := make([]string, 0, len(s)) |
| for _, c := range s { |
| rv = append(rv, c.Hash) |
| } |
| return rv |
| } |
| |
| // Graph represents an entire Git repo. |
| type Graph struct { |
| branches []*git.Branch |
| commits map[string]int |
| commitsData []*Commit |
| graphMtx sync.RWMutex |
| repo *git.Repo |
| repoMtx sync.Mutex |
| } |
| |
| // gobGraph is a utility struct used for serializing a Graph using gob. |
| type gobGraph struct { |
| Commits map[string]int |
| CommitsData []*Commit |
| } |
| |
| // New returns a Graph instance which uses the given git.Graph. Obtains cached |
| // data but does NOT update the Graph; the caller is responsible for doing so |
| // before using the Graph if up-to-date data is required. |
| func New(repo *git.Repo) (*Graph, error) { |
| rv := &Graph{ |
| commits: map[string]int{}, |
| commitsData: []*Commit{}, |
| repo: repo, |
| } |
| cacheFile := path.Join(repo.Dir(), CACHE_FILE) |
| var r gobGraph |
| if err := util.MaybeReadGobFile(cacheFile, &r); err != nil { |
| return nil, fmt.Errorf("Failed to read Graph cache file %s: %s", cacheFile, err) |
| } |
| if r.Commits != nil { |
| rv.commits = r.Commits |
| } |
| if r.CommitsData != nil { |
| rv.commitsData = r.CommitsData |
| } |
| for _, c := range rv.commitsData { |
| c.repo = rv |
| for _, parentIdx := range c.ParentIndices { |
| c.parents = append(c.parents, rv.commitsData[parentIdx]) |
| } |
| } |
| return rv, nil |
| } |
| |
| // NewGraph returns a Graph instance, creating a git.Repo from the repoUrl and |
| // workdir. Obtains cached data but does NOT update the Graph; the caller is |
| // responsible for doing so before using the Graph if up-to-date data is |
| // required. |
| func NewGraph(ctx context.Context, repoUrl, workdir string) (*Graph, error) { |
| repo, err := git.NewRepo(ctx, repoUrl, workdir) |
| if err != nil { |
| return nil, fmt.Errorf("Failed to create git repo: %s", err) |
| } |
| return New(repo) |
| } |
| |
| // Len returns the number of commits in the repo. |
| func (r *Graph) Len() int { |
| r.graphMtx.RLock() |
| defer r.graphMtx.RUnlock() |
| return len(r.commitsData) |
| } |
| |
| // addCommit adds the commit with the given hash to the Graph. Assumes that the |
| // caller holds r.repoMtx and r.graphMtx. |
| func (r *Graph) addCommit(ctx context.Context, hash string) error { |
| d, err := r.repo.Details(ctx, hash) |
| if err != nil { |
| return fmt.Errorf("repograph.Graph: Failed to obtain Git commit details: %s", err) |
| } |
| |
| var parentIndices []int |
| var parents []*Commit |
| if len(d.Parents) > 0 { |
| for _, h := range d.Parents { |
| if h == "" { |
| continue |
| } |
| p, ok := r.commits[h] |
| if !ok { |
| return fmt.Errorf("repograph.Graph: Could not find parent commit %q", h) |
| } |
| parentIndices = append(parentIndices, p) |
| parents = append(parents, r.commitsData[p]) |
| } |
| } |
| |
| c := &Commit{ |
| LongCommit: d, |
| ParentIndices: parentIndices, |
| parents: parents, |
| repo: r, |
| } |
| r.commits[hash] = len(r.commitsData) |
| r.commitsData = append(r.commitsData, c) |
| return nil |
| } |
| |
| // Update syncs the local copy of the repo and loads new commits/branches into |
| // the Graph object. |
| func (r *Graph) Update(ctx context.Context) error { |
| _, err := r.update(ctx, false) |
| return err |
| } |
| |
| // UpdateAndReturnNewCommits syncs the local copy of the repo and loads new |
| // commits/branches into the Graph object. Returns a slice of any new commits, |
| // in no particular order. |
| func (r *Graph) UpdateAndReturnNewCommits(ctx context.Context) ([]*vcsinfo.LongCommit, error) { |
| return r.update(ctx, true) |
| } |
| |
| func (r *Graph) update(ctx context.Context, returnNewCommits bool) ([]*vcsinfo.LongCommit, error) { |
| r.repoMtx.Lock() |
| defer r.repoMtx.Unlock() |
| |
| // Update the local copy. |
| sklog.Infof("Updating repograph.Graph...") |
| if err := r.repo.Update(ctx); err != nil { |
| return nil, fmt.Errorf("Failed to update repograph.Graph: %s", err) |
| } |
| |
| // Obtain the list of branches. |
| sklog.Info(" Getting branches...") |
| branches, err := r.repo.Branches(ctx) |
| if err != nil { |
| return nil, fmt.Errorf("Failed to get branches for repograph.Graph: %s", err) |
| } |
| |
| // Load new commits from the repo. |
| r.graphMtx.Lock() |
| defer r.graphMtx.Unlock() |
| var newCommits []*vcsinfo.LongCommit |
| if returnNewCommits { |
| newCommits = []*vcsinfo.LongCommit{} |
| } |
| sklog.Infof(" Loading commits...") |
| for _, b := range branches { |
| // Shortcut: If we already have the head of this branch, don't |
| // bother loading commits. |
| if _, ok := r.commits[b.Head]; ok { |
| continue |
| } |
| |
| // Load all commits on this branch. |
| // First, try to load only new commits on this branch. |
| var commits []string |
| newBranch := true |
| for _, old := range r.branches { |
| if old.Name == b.Name { |
| anc, err := r.repo.IsAncestor(ctx, old.Head, b.Head) |
| if err != nil { |
| return nil, err |
| } |
| if anc { |
| commits, err = r.repo.RevList(ctx, "--topo-order", fmt.Sprintf("%s..%s", old.Head, b.Head)) |
| if err != nil { |
| return nil, err |
| } |
| newBranch = false |
| } |
| break |
| } |
| } |
| // If this is a new branch, or if the old branch head is not |
| // reachable from the new (eg. if commit history was modified), |
| // load ALL commits reachable from the branch head. |
| if newBranch { |
| sklog.Infof(" Branch %s is new or its history has changed; loading all commits.", b.Name) |
| commits, err = r.repo.RevList(ctx, "--topo-order", b.Head) |
| if err != nil { |
| return nil, fmt.Errorf("Failed to 'git rev-list' for repograph.Graph: %s", err) |
| } |
| } |
| for i := len(commits) - 1; i >= 0; i-- { |
| hash := commits[i] |
| if hash == "" { |
| continue |
| } |
| if _, ok := r.commits[hash]; ok { |
| continue |
| } |
| if err := r.addCommit(ctx, hash); err != nil { |
| return nil, err |
| } |
| if returnNewCommits { |
| newCommits = append(newCommits, r.commitsData[r.commits[hash]].LongCommit) |
| } |
| } |
| } |
| oldBranches := r.branches |
| r.branches = branches |
| |
| // Write to the cache file. |
| sklog.Infof(" Writing cache file...") |
| cacheFile := path.Join(r.repo.Dir(), CACHE_FILE) |
| if err := util.WithWriteFile(cacheFile, func(w io.Writer) error { |
| return gob.NewEncoder(w).Encode(gobGraph{ |
| Commits: r.commits, |
| CommitsData: r.commitsData, |
| }) |
| }); err != nil { |
| // If we fail to write the file but keep the new branch heads, |
| // we won't get consistent commit lists from update() on the |
| // next try vs after a server restart. |
| r.branches = oldBranches |
| return nil, err |
| } |
| sklog.Infof(" Done. Graph has %d commits.", len(r.commits)) |
| return newCommits, nil |
| } |
| |
| // Branches returns the list of known branches in the repo. |
| func (r *Graph) Branches() []string { |
| r.graphMtx.RLock() |
| defer r.graphMtx.RUnlock() |
| rv := make([]string, 0, len(r.branches)) |
| for _, b := range r.branches { |
| rv = append(rv, b.Name) |
| } |
| return rv |
| } |
| |
| // BranchHeads returns the set of branch heads from the repo. |
| func (r *Graph) BranchHeads() []*gitinfo.GitBranch { |
| branches := r.Branches() |
| branchHeads := make([]*gitinfo.GitBranch, 0, len(branches)) |
| for _, b := range branches { |
| branchHeads = append(branchHeads, &gitinfo.GitBranch{ |
| Name: b, |
| Head: r.Get(b).Hash, |
| }) |
| } |
| return branchHeads |
| } |
| |
| // Get returns a Commit object for the given ref, if such a commit exists. This |
| // function does not understand complex ref types (eg. HEAD~3); only branch |
| // names and full commit hashes are accepted. |
| func (r *Graph) Get(ref string) *Commit { |
| r.graphMtx.RLock() |
| defer r.graphMtx.RUnlock() |
| if c, ok := r.commits[ref]; ok { |
| return r.commitsData[c] |
| } |
| for _, b := range r.branches { |
| if ref == b.Name { |
| if c, ok := r.commits[b.Head]; ok { |
| return r.commitsData[c] |
| } |
| } |
| } |
| return nil |
| } |
| |
| // RecurseAllBranches runs the given function recursively over the entire commit |
| // history, starting at each of the known branch heads. The function accepts the |
| // current Commit as a parameter. Returning ErrStopRecursing from the function |
| // indicates that recursion should stop for the current branch, however, |
| // recursion will continue for any other branches until they are similarly |
| // terminated. Returning any other error causes recursion to stop without |
| // properly terminating other branches. The error will bubble to the top and be |
| // returned. Here's an example of printing out all of the commits in the repo: |
| // |
| // repo.RecurseAllBranches(func(c *Commit) (bool, error) { |
| // sklog.Info(c.Hash) |
| // return true, nil |
| // }) |
| func (r *Graph) RecurseAllBranches(f func(*Commit) error) error { |
| branches := r.Branches() |
| visited := make(map[*Commit]bool, r.Len()) |
| for _, b := range branches { |
| c := r.Get(b) |
| if c == nil { |
| return fmt.Errorf("Branch %s not found", b) |
| } |
| if _, ok := visited[c]; !ok { |
| if err := c.recurse(f, visited); err != nil { |
| return err |
| } |
| } |
| } |
| return nil |
| } |
| |
| // RevList is the equivalent of "git rev-list --topo-order from..to". |
| // Each argument is a commit hash or a branch name. The commits are returned |
| // in reverse topological order. Per "git rev-list" docs, the returned commit |
| // hashes consist of all commits reachable from "to" and not reachable from |
| // "from". In the typical case of linear git history, this just means the |
| // commits on the line after "from" up through "to", but with branches and |
| // merges it's possible that there are entire sub-graphs which are reachable |
| // from "to" but not "from". |
| func (r *Graph) RevList(from, to string) ([]string, error) { |
| fromCommit := r.Get(from) |
| if fromCommit == nil { |
| return nil, fmt.Errorf("Unknown commit %q", from) |
| } |
| toCommit := r.Get(to) |
| if toCommit == nil { |
| return nil, fmt.Errorf("Unknown commit %q", to) |
| } |
| // Shortcut: if the commits are the same, return now. |
| if fromCommit == toCommit { |
| return []string{}, nil |
| } |
| |
| // Find all of the excluded commits. |
| exclude := make(map[*Commit]bool, r.Len()) |
| if err := fromCommit.recurse(func(c *Commit) error { |
| return nil |
| }, exclude); err != nil { |
| return nil, err |
| } |
| |
| // Find the included commits. |
| include := make(map[*Commit]bool, r.Len()) |
| if err := toCommit.recurse(func(c *Commit) error { |
| if exclude[c] { |
| return ErrStopRecursing |
| } |
| return nil |
| }, include); err != nil { |
| return nil, err |
| } |
| |
| // include may contain some commits from the exclude map; remove them. |
| for c, _ := range include { |
| if exclude[c] { |
| delete(include, c) |
| } |
| } |
| |
| // Topologically sort the commits. |
| sorted := topologicalSortHelper(include) |
| return CommitSlice(sorted).Hashes(), nil |
| } |
| |
| // TopologicalSort returns a slice containing the given commits in reverse |
| // topological order, ie. every commit is listed before any of its parents. |
| func TopologicalSort(commits []*Commit) []*Commit { |
| commitsMap := make(map[*Commit]bool, len(commits)) |
| for _, c := range commits { |
| commitsMap[c] = true |
| } |
| return topologicalSortHelper(commitsMap) |
| } |
| |
| // Helper function for TopologicalSort; the caller provides the map, which is |
| // modified by topologicalSortHelper. |
| func topologicalSortHelper(commits map[*Commit]bool) []*Commit { |
| // children maps each commit to those commits which have it as a parent. |
| children := make(map[*Commit]map[*Commit]bool, len(commits)) |
| for c, _ := range commits { |
| for _, p := range c.parents { |
| if commits[p] { |
| subMap, ok := children[p] |
| if !ok { |
| subMap = map[*Commit]bool{} |
| children[p] = subMap |
| } |
| subMap[c] = true |
| } |
| } |
| } |
| |
| // Sort the commits topologically. |
| rv := make([]*Commit, 0, len(commits)) |
| followBranch := func(c *Commit) { |
| for len(children[c]) == 0 { |
| // Add this commit to rv. |
| rv = append(rv, c) |
| delete(commits, c) |
| |
| // Remove this commit from its parents' children, so |
| // that they can be processed. |
| for _, p := range c.parents { |
| if commits[p] { |
| delete(children[p], c) |
| } |
| } |
| |
| // Find a parent to process next. |
| var next *Commit |
| for _, p := range c.parents { |
| if commits[p] && len(children[p]) == 0 { |
| // We are ready to process this parent. |
| if next == nil || p.Less(next) { |
| next = p |
| } |
| } |
| } |
| if next != nil { |
| c = next |
| } else { |
| // None of this commit's parents are ready; we can't do |
| // any more. |
| return |
| } |
| } |
| } |
| for len(commits) > 0 { |
| var next *Commit |
| for commit, _ := range commits { |
| if len(children[commit]) == 0 { |
| // We are ready to process this commit. |
| if next == nil || commit.Less(next) { |
| next = commit |
| } |
| } |
| } |
| if next == nil { |
| sklog.Error("No commits are ready to process!") |
| // Return so that we don't loop forever. |
| return rv |
| } |
| followBranch(next) |
| } |
| return rv |
| } |
| |
| // IsAncestor returns true iff A is an ancestor of B, where A and B are either |
| // commit hashes or branch names. |
| func (r *Graph) IsAncestor(a, b string) (bool, error) { |
| aCommit := r.Get(a) |
| if aCommit == nil { |
| return false, fmt.Errorf("No such commit %q", a) |
| } |
| bCommit := r.Get(b) |
| if bCommit == nil { |
| return false, fmt.Errorf("No such commit %q", b) |
| } |
| return bCommit.HasAncestor(aCommit.Hash), nil |
| } |
| |
| // Return any commits at or after the given timestamp. |
| func (r *Graph) GetCommitsNewerThan(ts time.Time) ([]*vcsinfo.LongCommit, error) { |
| commits := []*Commit{} |
| if err := r.RecurseAllBranches(func(c *Commit) error { |
| if !c.Timestamp.Before(ts) { |
| commits = append(commits, c) |
| return nil |
| } |
| return ErrStopRecursing |
| }); err != nil { |
| return nil, err |
| } |
| |
| // Sort the commits by timestamp, most recent first. |
| sort.Sort(CommitSlice(commits)) |
| |
| rv := make([]*vcsinfo.LongCommit, 0, len(commits)) |
| for _, c := range commits { |
| rv = append(rv, c.LongCommit) |
| } |
| return rv, nil |
| } |
| |
| // GetLastNCommits returns the last N commits in the repo. |
| func (r *Graph) GetLastNCommits(n int) ([]*vcsinfo.LongCommit, error) { |
| // Find the last Nth commit on master, which we assume has far more |
| // commits than any other branch. |
| commit := r.Get("master") |
| for i := 0; i < n-1; i++ { |
| p := commit.GetParents() |
| if len(p) < 1 { |
| // Cut short if we've hit the beginning of history. |
| break |
| } |
| commit = p[0] |
| } |
| commits, err := r.GetCommitsNewerThan(commit.Timestamp) |
| if err != nil { |
| return nil, err |
| } |
| |
| // Return the most-recent N commits. |
| if n > len(commits) { |
| n = len(commits) |
| } |
| return commits[:n], nil |
| } |
| |
| // TempCheckout returns a git.TempCheckout of the underlying git.Repo. |
| func (r *Graph) TempCheckout(ctx context.Context) (*git.TempCheckout, error) { |
| r.repoMtx.Lock() |
| defer r.repoMtx.Unlock() |
| return r.repo.TempCheckout(ctx) |
| } |
| |
| // GetFile returns the contents of the given file at the given commit. |
| func (r *Graph) GetFile(ctx context.Context, fileName, commit string) (string, error) { |
| r.repoMtx.Lock() |
| defer r.repoMtx.Unlock() |
| return r.repo.GetFile(ctx, fileName, commit) |
| } |
| |
| // Map is a convenience type for dealing with multiple Graphs for different |
| // repos. The keys are repository URLs. |
| type Map map[string]*Graph |
| |
| // NewMap returns a Map instance with Graphs for the given repo URLs. Obtains |
| // cached data but does NOT update the Map; the caller is responsible for |
| // doing so before using the Map if up-to-date data is required. |
| func NewMap(ctx context.Context, repos []string, workdir string) (Map, error) { |
| rv := make(map[string]*Graph, len(repos)) |
| for _, r := range repos { |
| g, err := NewGraph(ctx, r, workdir) |
| if err != nil { |
| return nil, err |
| } |
| rv[r] = g |
| } |
| return rv, nil |
| } |
| |
| // Update updates all Graphs in the Map. |
| func (m Map) Update(ctx context.Context) error { |
| _, err := m.update(ctx, false) |
| return err |
| } |
| |
| // UpdateAndReturnNewCommits updates all Graphs in the Map. Returns a map of |
| // repo URLs to slices of new commits in each of the repos. |
| func (m Map) UpdateAndReturnNewCommits(ctx context.Context) (map[string][]*vcsinfo.LongCommit, error) { |
| return m.update(ctx, true) |
| } |
| |
| // Update updates all Graphs in the Map. Returns a map of repo URLs to slices of |
| // new commits in each of the repos. |
| func (m Map) update(ctx context.Context, returnNewCommits bool) (map[string][]*vcsinfo.LongCommit, error) { |
| newCommits := make(map[string][]*vcsinfo.LongCommit, len(m)) |
| for repoUrl, g := range m { |
| commits, err := g.update(ctx, returnNewCommits) |
| if err != nil { |
| return nil, err |
| } |
| newCommits[repoUrl] = commits |
| } |
| return newCommits, nil |
| } |
| |
| // FindCommit returns the Commit object, repo URL, and Graph object for the |
| // given commit hash if it exists in any of the Graphs in the Map and an error |
| // otherwise. |
| func (m Map) FindCommit(commit string) (*Commit, string, *Graph, error) { |
| for name, repo := range m { |
| c := repo.Get(commit) |
| if c != nil { |
| return c, name, repo, nil |
| } |
| } |
| return nil, "", nil, fmt.Errorf("Unable to find commit %s in any repo.", commit) |
| } |
| |
| // RepoURLs returns the list of repositories in the Map. |
| func (m Map) RepoURLs() []string { |
| rv := make([]string, 0, len(m)) |
| for r := range m { |
| rv = append(rv, r) |
| } |
| return rv |
| } |