[repograph] Remove commitsData

It's no longer needed, now that we don't track parent indices

Bug: skia:
Change-Id: I3d45ff836671e865d7bf864236c982726d258c28
Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/201884
Commit-Queue: Eric Boren <borenet@google.com>
Reviewed-by: Ben Wagner <benjaminwagner@google.com>
diff --git a/go/git/repograph/gitstore.go b/go/git/repograph/gitstore.go
index b35a7ca..2f6a60a 100644
--- a/go/git/repograph/gitstore.go
+++ b/go/git/repograph/gitstore.go
@@ -146,34 +146,21 @@
 	}
 	if needOrphanCheck {
 		sklog.Warningf("History change detected; checking for orphaned commits.")
-		visited := make(map[*Commit]bool, len(graph.commitsData))
+		visited := make(map[*Commit]bool, len(graph.commits))
 		for _, newBranchHead := range newBranchesMap {
 			// Not using Get() because graphMtx is locked.
-			if err := graph.commitsData[graph.commits[newBranchHead]].recurse(func(c *Commit) error {
+			if err := graph.commits[newBranchHead].recurse(func(c *Commit) error {
 				return nil
 			}, visited); err != nil {
 				return nil, err
 			}
 		}
-		orphaned := map[*Commit]bool{}
-		for _, c := range graph.commitsData {
+		for hash, c := range graph.commits {
 			if !visited[c] {
-				orphaned[c] = true
+				sklog.Warningf("Commit %s is orphaned. Removing from the Graph.", hash)
+				delete(graph.commits, hash)
 			}
 		}
-		if len(orphaned) > 0 {
-			sklog.Errorf("%d commits are now orphaned. Removing them from the Graph.", len(orphaned))
-			newCommitsData := make([]*Commit, 0, len(graph.commitsData)-len(orphaned))
-			newCommitsMap := make(map[string]int, len(graph.commitsData)-len(orphaned))
-			for _, c := range graph.commitsData {
-				if !orphaned[c] {
-					newCommitsMap[c.Hash] = len(newCommitsData)
-					newCommitsData = append(newCommitsData, c)
-				}
-			}
-			graph.commits = newCommitsMap
-			graph.commitsData = newCommitsData
-		}
 	}
 
 	// Update the rest of the Graph.
@@ -203,7 +190,6 @@
 	defer r.graphMtx.Unlock()
 	r.branches = newGraph.branches
 	r.commits = newGraph.commits
-	r.commitsData = newGraph.commitsData
 	r.gitstoreLastUpdate = newGraph.gitstoreLastUpdate
 	sklog.Infof("  Done. Graph has %d commits.", len(r.commits))
 	return newCommits, nil
diff --git a/go/git/repograph/graph.go b/go/git/repograph/graph.go
index dfe4bc2..8cfeddd 100644
--- a/go/git/repograph/graph.go
+++ b/go/git/repograph/graph.go
@@ -10,6 +10,7 @@
 	"errors"
 	"fmt"
 	"io"
+	"os"
 	"path"
 	"sort"
 	"sync"
@@ -37,11 +38,11 @@
 	*vcsinfo.LongCommit
 	parents []*Commit
 
-	// repoLen is the number of known commits at the time that this Commit
-	// was added to the graph. It is only used during Recurse as a rough
-	// estimate of the maximum number of commits which could be visited, to
-	// prevent excessive resizing of the visited map.
-	repoLen int
+	// HistoryLen is the number of commits in the longest line of history
+	// reachable from this Commit. It is only used during Recurse as an
+	// estimate of the number of commits which might be visited, to prevent
+	// excessive resizing of the visited map.
+	HistoryLen int
 }
 
 // Parents returns the parents of this commit.
@@ -63,7 +64,7 @@
 // 	return nil
 // })
 func (c *Commit) Recurse(f func(*Commit) error) error {
-	return c.recurse(f, make(map[*Commit]bool, c.repoLen))
+	return c.recurse(f, make(map[*Commit]bool, c.HistoryLen))
 }
 
 // recurse is a helper function used by Recurse.
@@ -106,7 +107,7 @@
 // AllCommits returns the hashes of all commits reachable from this Commit, in
 // reverse topological order.
 func (c *Commit) AllCommits() ([]string, error) {
-	commits := make(map[*Commit]bool, c.repoLen)
+	commits := make(map[*Commit]bool, c.HistoryLen)
 	if err := c.recurse(func(c *Commit) error {
 		return nil
 	}, commits); err != nil {
@@ -167,10 +168,9 @@
 
 // Graph represents an entire Git repo.
 type Graph struct {
-	branches    []*git.Branch
-	commits     map[string]int
-	commitsData []*Commit
-	graphMtx    sync.RWMutex
+	branches []*git.Branch
+	commits  map[string]*Commit
+	graphMtx sync.RWMutex
 
 	// repo is only set if NewLocalGraph() is used.
 	repo    *git.Repo
@@ -184,9 +184,8 @@
 
 // gobGraph is a utility struct used for serializing a Graph using gob.
 type gobGraph struct {
-	Branches    []*git.Branch
-	Commits     map[string]int
-	CommitsData []*Commit
+	Branches []*git.Branch
+	Commits  map[string]*Commit
 }
 
 // NewLocalGraph returns a Graph instance, creating a git.Repo from the repoUrl
@@ -199,14 +198,16 @@
 		return nil, fmt.Errorf("Failed to create git repo: %s", err)
 	}
 	rv := &Graph{
-		commits:     map[string]int{},
-		commitsData: []*Commit{},
-		repo:        repo,
+		commits: map[string]*Commit{},
+		repo:    repo,
 	}
 	cacheFile := path.Join(repo.Dir(), CACHE_FILE)
 	var r gobGraph
 	if err := util.MaybeReadGobFile(cacheFile, &r); err != nil {
-		return nil, fmt.Errorf("Failed to read Graph cache file %s: %s", cacheFile, err)
+		sklog.Errorf("Failed to read Graph cache file %s; deleting the file and starting from scratch: %s", cacheFile, err)
+		if err2 := os.Remove(cacheFile); err != nil {
+			return nil, fmt.Errorf("Failed to read Graph cache file %s: %s\n...and failed to remove with: %s", cacheFile, err, err2)
+		}
 	}
 	if r.Branches != nil {
 		rv.branches = r.Branches
@@ -214,13 +215,9 @@
 	if r.Commits != nil {
 		rv.commits = r.Commits
 	}
-	if r.CommitsData != nil {
-		rv.commitsData = r.CommitsData
-	}
-	for idx, c := range rv.commitsData {
-		c.repoLen = idx + 1
+	for _, c := range rv.commits {
 		for _, parentHash := range c.Parents {
-			c.parents = append(c.parents, rv.commitsData[rv.commits[parentHash]])
+			c.parents = append(c.parents, rv.commits[parentHash])
 		}
 	}
 	return rv, nil
@@ -229,9 +226,8 @@
 // NewGitStoreGraph returns a Graph instance which is backed by a GitStore.
 func NewGitStoreGraph(ctx context.Context, gs gitstore.GitStore) (*Graph, error) {
 	rv := &Graph{
-		commits:     map[string]int{},
-		commitsData: []*Commit{},
-		gitstore:    gs,
+		commits:  map[string]*Commit{},
+		gitstore: gs,
 	}
 	if err := rv.Update(ctx); err != nil {
 		return nil, err
@@ -243,12 +239,13 @@
 func (r *Graph) Len() int {
 	r.graphMtx.RLock()
 	defer r.graphMtx.RUnlock()
-	return len(r.commitsData)
+	return len(r.commits)
 }
 
 // addCommit adds the commit with the given hash to the Graph. Assumes that the
 // caller holds r.graphMtx.
 func (r *Graph) addCommit(d *vcsinfo.LongCommit) error {
+	maxParentHistoryLen := 0
 	var parents []*Commit
 	if len(d.Parents) > 0 {
 		for _, h := range d.Parents {
@@ -259,17 +256,19 @@
 			if !ok {
 				return fmt.Errorf("repograph.Graph: Could not find parent commit %q", h)
 			}
-			parents = append(parents, r.commitsData[p])
+			parents = append(parents, p)
+			if p.HistoryLen > maxParentHistoryLen {
+				maxParentHistoryLen = p.HistoryLen
+			}
 		}
 	}
 
 	c := &Commit{
 		LongCommit: d,
 		parents:    parents,
-		repoLen:    len(r.commitsData) + 1,
+		HistoryLen: maxParentHistoryLen + 1,
 	}
-	r.commits[d.Hash] = len(r.commitsData)
-	r.commitsData = append(r.commitsData, c)
+	r.commits[c.Hash] = c
 	return nil
 }
 
@@ -393,34 +392,21 @@
 	}
 	if needOrphanCheck {
 		sklog.Warningf("History change detected; checking for orphaned commits.")
-		visited := make(map[*Commit]bool, len(graph.commitsData))
+		visited := make(map[*Commit]bool, len(graph.commits))
 		for _, newBranchHead := range newBranchesMap {
 			// Not using Get() because graphMtx is locked.
-			if err := graph.commitsData[graph.commits[newBranchHead]].recurse(func(c *Commit) error {
+			if err := graph.commits[newBranchHead].recurse(func(c *Commit) error {
 				return nil
 			}, visited); err != nil {
 				return nil, err
 			}
 		}
-		orphaned := map[*Commit]bool{}
-		for _, c := range graph.commitsData {
+		for hash, c := range graph.commits {
 			if !visited[c] {
-				orphaned[c] = true
+				sklog.Warningf("Commit %s is orphaned. Removing from the Graph.", hash)
+				delete(graph.commits, hash)
 			}
 		}
-		if len(orphaned) > 0 {
-			sklog.Errorf("%d commits are now orphaned. Removing them from the Graph.", len(orphaned))
-			newCommitsData := make([]*Commit, 0, len(graph.commitsData)-len(orphaned))
-			newCommitsMap := make(map[string]int, len(graph.commitsData)-len(orphaned))
-			for _, c := range graph.commitsData {
-				if !orphaned[c] {
-					newCommitsMap[c.Hash] = len(newCommitsData)
-					newCommitsData = append(newCommitsData, c)
-				}
-			}
-			graph.commits = newCommitsMap
-			graph.commitsData = newCommitsData
-		}
 	}
 
 	graph.branches = newBranchesList
@@ -433,9 +419,8 @@
 	cacheFile := path.Join(repo.Dir(), CACHE_FILE)
 	return util.WithWriteFile(cacheFile, func(w io.Writer) error {
 		return gob.NewEncoder(w).Encode(gobGraph{
-			Branches:    r.branches,
-			Commits:     r.commits,
-			CommitsData: r.commitsData,
+			Branches: r.branches,
+			Commits:  r.commits,
 		})
 	})
 }
@@ -454,7 +439,6 @@
 	defer r.graphMtx.Unlock()
 	r.branches = newGraph.branches
 	r.commits = newGraph.commits
-	r.commitsData = newGraph.commitsData
 	sklog.Infof("  Done. Graph has %d commits.", len(r.commits))
 	return newCommits, nil
 }
@@ -488,12 +472,10 @@
 func (r *Graph) ShallowCopy() *Graph {
 	r.graphMtx.RLock()
 	defer r.graphMtx.RUnlock()
-	newCommits := make(map[string]int, len(r.commits))
+	newCommits := make(map[string]*Commit, len(r.commits))
 	for k, v := range r.commits {
 		newCommits[k] = v
 	}
-	newCommitsData := make([]*Commit, len(r.commitsData))
-	copy(newCommitsData, r.commitsData)
 	newBranches := make([]*git.Branch, 0, len(r.branches))
 	for _, branch := range r.branches {
 		newBranches = append(newBranches, &git.Branch{
@@ -502,9 +484,8 @@
 		})
 	}
 	return &Graph{
-		branches:    newBranches,
-		commits:     newCommits,
-		commitsData: newCommitsData,
+		branches: newBranches,
+		commits:  newCommits,
 	}
 }
 
@@ -515,12 +496,12 @@
 	r.graphMtx.RLock()
 	defer r.graphMtx.RUnlock()
 	if c, ok := r.commits[ref]; ok {
-		return r.commitsData[c]
+		return c
 	}
 	for _, b := range r.branches {
 		if ref == b.Name {
 			if c, ok := r.commits[b.Head]; ok {
-				return r.commitsData[c]
+				return c
 			}
 		}
 	}
diff --git a/go/git/repograph/graph_test.go b/go/git/repograph/graph_test.go
index 14d6561..38e9d5b 100644
--- a/go/git/repograph/graph_test.go
+++ b/go/git/repograph/graph_test.go
@@ -720,11 +720,15 @@
 		assertTopoSorted(t, sorted)
 	}
 	checkGraph := func(g *Graph) {
-		sets := util.PowerSet(len(g.commitsData))
+		commitsList := make([]*Commit, 0, len(g.commits))
+		for _, c := range g.commits {
+			commitsList = append(commitsList, c)
+		}
+		sets := util.PowerSet(len(commitsList))
 		for _, set := range sets {
-			inp := make([]*Commit, 0, len(g.commitsData))
+			inp := make([]*Commit, 0, len(commitsList))
 			for _, idx := range set {
-				inp = append(inp, g.commitsData[idx])
+				inp = append(inp, commitsList[idx])
 			}
 			check(inp)
 		}
@@ -741,7 +745,11 @@
 		// Test stability by verifying that we get the expected ordering
 		// for the entire repo, multiple times.
 		for i := 0; i < 10; i++ {
-			sorted := CommitSlice(TopologicalSort(g.commitsData)).Hashes()
+			commitsList := make([]*Commit, 0, len(g.commits))
+			for _, c := range g.commits {
+				commitsList = append(commitsList, c)
+			}
+			sorted := CommitSlice(TopologicalSort(commitsList)).Hashes()
 			deepequal.AssertDeepEqual(t, expect, sorted)
 		}
 	}