| // Package gitinfo enables querying info from Git repository using git and a local checkout. |
| package gitinfo |
| |
| import ( |
| "context" |
| "fmt" |
| "os" |
| "path" |
| "regexp" |
| "sort" |
| "strconv" |
| "strings" |
| "sync" |
| "time" |
| |
| "go.skia.org/infra/go/exec" |
| "go.skia.org/infra/go/git" |
| "go.skia.org/infra/go/git/git_common" |
| "go.skia.org/infra/go/skerr" |
| "go.skia.org/infra/go/sklog" |
| "go.skia.org/infra/go/vcsinfo" |
| ) |
| |
| // commitLineRe matches one line of commit log and captures hash, author and |
| // subject groups. |
| var commitLineRe = regexp.MustCompile(`([0-9a-f]{40}),([^,\n]+),(.+)$`) |
| |
| // GitInfo allows querying a Git repo. |
| type GitInfo struct { |
| dir git.Checkout |
| hashes []string |
| timestamps map[string]time.Time // The git hash is the key. |
| detailsCache map[string]*vcsinfo.LongCommit // The git hash is the key. |
| firstCommit string |
| |
| // Any access to hashes or timestamps must be protected. |
| mutex sync.Mutex |
| } |
| |
| // GetBranch implements the vcsinfo.VCS interface. |
| func (g *GitInfo) GetBranch() string { |
| // This was added later and we have implicitly assumed that we are tracking |
| // the main branch. |
| return git.MasterBranch |
| } |
| |
| // NewGitInfo creates a new GitInfo for the Git repository found in directory |
| // dir. If pull is true then a git pull is done on the repo before querying it |
| // for history. |
| func NewGitInfo(ctx context.Context, dir string, pull, allBranches bool) (*GitInfo, error) { |
| g := &GitInfo{ |
| dir: git.CheckoutDir(dir), |
| hashes: []string{}, |
| detailsCache: map[string]*vcsinfo.LongCommit{}, |
| } |
| return g, g.Update(ctx, pull, allBranches) |
| } |
| |
| // Clone creates a new GitInfo by running "git clone" in the given directory. |
| func Clone(ctx context.Context, repoUrl, dir string, allBranches bool) (*GitInfo, error) { |
| gitPath, _, _, err := git_common.FindGit(ctx) |
| if err != nil { |
| return nil, skerr.Wrap(err) |
| } |
| if _, err := exec.RunSimple(ctx, fmt.Sprintf("%s clone %s %s", gitPath, repoUrl, dir)); err != nil { |
| return nil, fmt.Errorf("Failed to clone %s into %s: %s", repoUrl, dir, err) |
| } |
| return NewGitInfo(ctx, dir, false, allBranches) |
| } |
| |
| // CloneOrUpdate creates a new GitInfo by running "git clone" or "git pull" |
| // depending on whether the repo already exists. |
| func CloneOrUpdate(ctx context.Context, repoUrl, dir string, allBranches bool) (*GitInfo, error) { |
| gitDir := path.Join(dir, ".git") |
| _, err := os.Stat(gitDir) |
| if err == nil { |
| return NewGitInfo(ctx, dir, true, allBranches) |
| } |
| if os.IsNotExist(err) { |
| return Clone(ctx, repoUrl, dir, allBranches) |
| } |
| return nil, err |
| } |
| |
| // Update refreshes the history that GitInfo stores for the repo. If pull is |
| // true then git pull is performed before refreshing. |
| func (g *GitInfo) Update(ctx context.Context, pull, allBranches bool) error { |
| g.mutex.Lock() |
| defer g.mutex.Unlock() |
| |
| sklog.Info("Beginning Update.") |
| if pull { |
| if _, err := g.dir.Git(ctx, "pull"); err != nil { |
| return fmt.Errorf("Failed to sync to HEAD: %s", err) |
| } |
| } |
| sklog.Info("Finished pull.") |
| var hashes []string |
| var timestamps map[string]time.Time |
| var err error |
| if allBranches { |
| hashes, timestamps, err = readCommitsFromGitAllBranches(ctx, g.dir) |
| } else { |
| hashes, timestamps, err = readCommitsFromGit(ctx, g.dir, "HEAD") |
| } |
| sklog.Infof("Finished reading commits: %s", g.dir) |
| if err != nil { |
| return fmt.Errorf("Failed to read commits from: %s : %s", g.dir, err) |
| } |
| g.hashes = hashes |
| g.timestamps = timestamps |
| g.firstCommit, err = g.InitialCommit(ctx) |
| if err != nil { |
| return fmt.Errorf("Failed to get initial commit: %s", err) |
| } |
| return nil |
| } |
| |
| // Dir returns the checkout dir of the GitInfo.. |
| func (g *GitInfo) Dir() string { |
| return g.dir.Dir() |
| } |
| |
| // Details returns more information than ShortCommit about a given commit. |
| // See the vcsinfo.VCS interface for details. |
| func (g *GitInfo) Details(ctx context.Context, hash string, includeBranchInfo bool) (*vcsinfo.LongCommit, error) { |
| g.mutex.Lock() |
| defer g.mutex.Unlock() |
| return g.details(ctx, hash, includeBranchInfo) |
| } |
| |
| // See the vcsinfo.VCS interface for details. |
| func (g *GitInfo) DetailsMulti(ctx context.Context, hashes []string, includeBranchInfo bool) ([]*vcsinfo.LongCommit, error) { |
| g.mutex.Lock() |
| defer g.mutex.Unlock() |
| ret := make([]*vcsinfo.LongCommit, len(hashes)) |
| for idx, hash := range hashes { |
| var err error |
| if ret[idx], err = g.details(ctx, hash, includeBranchInfo); err != nil { |
| return nil, err |
| } |
| } |
| return ret, nil |
| } |
| |
| // details returns more information than ShortCommit about a given commit. |
| // See the vcsinfo.VCS interface for details. |
| // |
| // Caller is responsible for locking the mutex. |
| func (g *GitInfo) details(ctx context.Context, hash string, includeBranchInfo bool) (*vcsinfo.LongCommit, error) { |
| if c, ok := g.detailsCache[hash]; ok { |
| // Return the cached value if the branchInfo request matches. |
| if !includeBranchInfo || (len(c.Branches) > 0) { |
| return c, nil |
| } |
| } |
| output, err := g.dir.Git(ctx, "log", "-n", "1", "--format=format:%H%n%P%n%an%x20(%ae)%n%s%n%b", hash) |
| if err != nil { |
| return nil, fmt.Errorf("Failed to execute Git: %s", err) |
| } |
| lines := strings.SplitN(output, "\n", 5) |
| if len(lines) != 5 { |
| return nil, fmt.Errorf("Failed to parse output of 'git log'.") |
| } |
| branches := map[string]bool{} |
| if includeBranchInfo { |
| branches, err = g.getBranchesForCommit(ctx, hash) |
| if err != nil { |
| return nil, err |
| } |
| } |
| |
| var parents []string |
| if lines[1] != "" { |
| parents = strings.Split(lines[1], " ") |
| } |
| c := vcsinfo.LongCommit{ |
| ShortCommit: &vcsinfo.ShortCommit{ |
| Hash: lines[0], |
| Author: lines[2], |
| Subject: lines[3], |
| }, |
| Parents: parents, |
| Body: lines[4], |
| Timestamp: g.timestamps[hash], |
| Branches: branches, |
| } |
| g.detailsCache[hash] = &c |
| return &c, nil |
| } |
| |
| func (g *GitInfo) Reset(ctx context.Context, ref string) error { |
| _, err := g.dir.Git(ctx, "reset", "--hard", ref) |
| if err != nil { |
| return fmt.Errorf("Failed to roll back/forward to commit %s: %s", ref, err) |
| } |
| return nil |
| } |
| |
| func (g *GitInfo) Checkout(ctx context.Context, ref string) error { |
| if _, err := g.dir.Git(ctx, "checkout", ref); err != nil { |
| return fmt.Errorf("Failed to checkout %s: %s", ref, err) |
| } |
| return nil |
| } |
| |
| // getBranchesForCommit returns a string set with all the branches that can reach |
| // the commit with the given hash. |
| // TODO(stephana): Speed up this method, there are either better ways to do this |
| // in git or the results can be cached. |
| func (g *GitInfo) getBranchesForCommit(ctx context.Context, hash string) (map[string]bool, error) { |
| output, err := g.dir.Git(ctx, "branch", "--all", "--list", "--contains", hash) |
| if err != nil { |
| return nil, fmt.Errorf("Failed to get branches for commit %s: %s", hash, err) |
| } |
| |
| lines := strings.Split(strings.TrimSpace(output), "\n") |
| ret := map[string]bool{} |
| for _, line := range lines { |
| l := strings.TrimSpace(line) |
| if l != "" { |
| // Splitting the line to filter out the '*' that marks the active branch. |
| parts := strings.Split(l, " ") |
| ret[parts[len(parts)-1]] = true |
| } |
| } |
| return ret, nil |
| } |
| |
| // RevList returns the results of "git rev-list". |
| func (g *GitInfo) RevList(ctx context.Context, args ...string) ([]string, error) { |
| g.mutex.Lock() |
| defer g.mutex.Unlock() |
| output, err := g.dir.Git(ctx, append([]string{"rev-list"}, args...)...) |
| if err != nil { |
| return nil, fmt.Errorf("git rev-list failed: %v", err) |
| } |
| res := strings.Trim(output, "\n") |
| if res == "" { |
| return []string{}, nil |
| } |
| return strings.Split(res, "\n"), nil |
| } |
| |
| // From returns all commits from 'start' to HEAD. |
| func (g *GitInfo) From(start time.Time) []string { |
| g.mutex.Lock() |
| defer g.mutex.Unlock() |
| ret := []string{} |
| for _, h := range g.hashes { |
| if g.timestamps[h].After(start) { |
| ret = append(ret, h) |
| } |
| } |
| return ret |
| } |
| |
| // Range returns all commits from the half open interval ['begin', 'end'), i.e. |
| // includes 'begin' and excludes 'end'. |
| func (g *GitInfo) Range(begin, end time.Time) []*vcsinfo.IndexCommit { |
| g.mutex.Lock() |
| defer g.mutex.Unlock() |
| ret := []*vcsinfo.IndexCommit{} |
| first := sort.Search(len(g.hashes), func(i int) bool { |
| ts := g.timestamps[g.hashes[i]] |
| return ts.After(begin) || ts.Equal(begin) |
| }) |
| if first == len(g.timestamps) { |
| return ret |
| } |
| for i, h := range g.hashes[first:] { |
| if g.timestamps[h].Before(end) { |
| ret = append(ret, &vcsinfo.IndexCommit{ |
| Hash: h, |
| Index: first + i, |
| Timestamp: g.timestamps[h], |
| }) |
| } else { |
| break |
| } |
| } |
| return ret |
| } |
| |
| // LastNIndex returns the last N commits. |
| func (g *GitInfo) LastNIndex(N int) []*vcsinfo.IndexCommit { |
| g.mutex.Lock() |
| defer g.mutex.Unlock() |
| var hashes []string |
| offset := 0 |
| if len(g.hashes) < N { |
| hashes = g.hashes |
| } else { |
| hashes = g.hashes[len(g.hashes)-N:] |
| offset = len(g.hashes) - N |
| } |
| ret := []*vcsinfo.IndexCommit{} |
| for i, h := range hashes { |
| ret = append(ret, &vcsinfo.IndexCommit{ |
| Hash: h, |
| Index: i + offset, |
| Timestamp: g.timestamps[h], |
| }) |
| } |
| return ret |
| } |
| |
| // IndexOf returns the index of given hash as counted from the first commit in |
| // this branch by 'rev-list'. The index is 0 based. |
| func (g *GitInfo) IndexOf(ctx context.Context, hash string) (int, error) { |
| // Count the lines from running: |
| // git rev-list --count <first-commit>..hash. |
| output, err := g.RevList(ctx, "--count", git.LogFromTo(g.firstCommit, hash)) |
| if err != nil { |
| return 0, fmt.Errorf("git rev-list failed: %s", err) |
| } |
| if len(output) != 1 { |
| return 0, fmt.Errorf("git rev-list wrong size output: %s", err) |
| } |
| n, err := strconv.Atoi(output[0]) |
| if err != nil { |
| return 0, fmt.Errorf("Didn't get a number: %s", err) |
| } |
| return n, nil |
| } |
| |
| // ByIndex returns a LongCommit describing the commit |
| // at position N, as ordered in the current branch. |
| // |
| // Does not make sense if readCommitsFromGitAllBranches has been |
| // called. |
| func (g *GitInfo) ByIndex(ctx context.Context, N int) (*vcsinfo.LongCommit, error) { |
| g.mutex.Lock() |
| defer g.mutex.Unlock() |
| numHashes := len(g.hashes) |
| if N < 0 || N >= numHashes { |
| return nil, fmt.Errorf("Hash index not found: %d", N) |
| } |
| return g.details(ctx, g.hashes[N], false) |
| } |
| |
| // LastN returns the last N commits. |
| func (g *GitInfo) LastN(ctx context.Context, N int) []string { |
| g.mutex.Lock() |
| defer g.mutex.Unlock() |
| if len(g.hashes) < N { |
| return g.hashes[0:len(g.hashes)] |
| } else { |
| return g.hashes[len(g.hashes)-N:] |
| } |
| } |
| |
| // This is a temporary performance enhancement for Perf. |
| // It will be removed once Perf moves to gitstore. |
| func (g *GitInfo) TimestampAtIndex(N int) (time.Time, error) { |
| if N < 0 || N >= len(g.hashes) { |
| return time.Time{}, fmt.Errorf("Hash index not found: %d", N) |
| } |
| g.mutex.Lock() |
| defer g.mutex.Unlock() |
| return g.timestamps[g.hashes[N]], nil |
| } |
| |
| // Timestamp returns the timestamp for the given hash. |
| func (g *GitInfo) Timestamp(hash string) time.Time { |
| g.mutex.Lock() |
| defer g.mutex.Unlock() |
| return g.timestamps[hash] |
| } |
| |
| // Log returns a --name-only short log for every commit in (begin, end]. |
| // |
| // If end is "" then it returns just the short log for the single commit at |
| // begin. |
| // |
| // Example response: |
| // |
| // commit b7988a21fdf23cc4ace6145a06ea824aa85db099 |
| // Author: Joe Gregorio <jcgregorio@google.com> |
| // Date: Tue Aug 5 16:19:48 2014 -0400 |
| // |
| // A description of the commit. |
| // |
| // perf/go/skiaperf/perf.go |
| // perf/go/types/types.go |
| // perf/res/js/logic.js |
| func (g *GitInfo) Log(ctx context.Context, begin, end string) (string, error) { |
| command := []string{"log", "--name-only"} |
| hashrange := begin |
| if end != "" { |
| hashrange += ".." + end |
| command = append(command, hashrange) |
| } else { |
| command = append(command, "-n", "1", hashrange) |
| } |
| output, err := g.dir.Git(ctx, command...) |
| if err != nil { |
| return "", err |
| } |
| return output, nil |
| } |
| |
| // LogFine is the same as Log() but appends all the 'args' to the Log |
| // request to allow finer control of the log output. I.e. you could call: |
| // |
| // LogFine(begin, end, "--format=format:%ct", "infra/bots/assets/skp/VERSION") |
| |
| func (g *GitInfo) LogFine(ctx context.Context, begin, end string, args ...string) (string, error) { |
| command := []string{"log"} |
| hashrange := begin |
| if end != "" { |
| hashrange += ".." + end |
| command = append(command, hashrange) |
| } else { |
| command = append(command, "-n", "1", hashrange) |
| } |
| command = append(command, args...) |
| output, err := g.dir.Git(ctx, command...) |
| if err != nil { |
| return "", err |
| } |
| return output, nil |
| } |
| |
| // LogArgs is the same as Log() but appends all the 'args' to the Log |
| // request to allow finer control of the log output. I.e. you could call: |
| // |
| // LogArgs("--since=2015-10-24", "--format=format:%ct", "infra/bots/assets/skp/VERSION") |
| func (g *GitInfo) LogArgs(ctx context.Context, args ...string) (string, error) { |
| command := []string{"log"} |
| command = append(command, args...) |
| output, err := g.dir.Git(ctx, command...) |
| if err != nil { |
| return "", err |
| } |
| return output, nil |
| } |
| |
| // FullHash gives the full commit hash for the given ref. |
| func (g *GitInfo) FullHash(ctx context.Context, ref string) (string, error) { |
| output, err := g.dir.Git(ctx, "rev-parse", fmt.Sprintf("%s^{commit}", ref)) |
| if err != nil { |
| return "", fmt.Errorf("Failed to obtain full hash: %s", err) |
| } |
| return strings.Trim(output, "\n"), nil |
| } |
| |
| // InitialCommit returns the hash of the initial commit. |
| func (g *GitInfo) InitialCommit(ctx context.Context) (string, error) { |
| output, err := g.dir.Git(ctx, "rev-list", "--max-parents=0", "--first-parent", "HEAD") |
| if err != nil { |
| return "", fmt.Errorf("Failed to determine initial commit: %v", err) |
| } |
| return strings.Trim(output, "\n"), nil |
| } |
| |
| // GetBranches returns a slice of strings naming the branches in the repo. |
| func (g *GitInfo) GetBranches(ctx context.Context) ([]*GitBranch, error) { |
| return GetBranches(ctx, g.dir) |
| } |
| |
| // ShortCommits stores a slice of ShortCommit struct. |
| type ShortCommits struct { |
| Commits []*vcsinfo.ShortCommit |
| } |
| |
| // ShortList returns a slice of ShortCommit for every commit in (begin, end]. |
| func (g *GitInfo) ShortList(ctx context.Context, begin, end string) (*ShortCommits, error) { |
| command := []string{"log", "--pretty='%H,%an,%s", begin + ".." + end} |
| output, err := g.dir.Git(ctx, command...) |
| if err != nil { |
| return nil, err |
| } |
| ret := &ShortCommits{ |
| Commits: []*vcsinfo.ShortCommit{}, |
| } |
| for _, line := range strings.Split(output, "\n") { |
| match := commitLineRe.FindStringSubmatch(line) |
| if match == nil { |
| // This could happen if the subject has new line, in which case we truncate it and ignore the remainder. |
| continue |
| } |
| commit := &vcsinfo.ShortCommit{ |
| Hash: match[1], |
| Author: match[2], |
| Subject: match[3], |
| } |
| ret.Commits = append(ret.Commits, commit) |
| } |
| |
| return ret, nil |
| } |
| |
| // gitHash represents information on a single Git commit. |
| type gitHash struct { |
| hash string |
| timeStamp time.Time |
| } |
| |
| type gitHashSlice []*gitHash |
| |
| func (p gitHashSlice) Len() int { return len(p) } |
| func (p gitHashSlice) Less(i, j int) bool { return p[i].timeStamp.Before(p[j].timeStamp) } |
| func (p gitHashSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } |
| |
| // GitBranch represents a Git branch. |
| type GitBranch struct { |
| Name string `json:"name"` |
| Head string `json:"head"` |
| } |
| |
| // includeBranchPrefixes is the list of branch prefixes that we should consider in the |
| // output of the 'git show-ref' command issued in GetBranches below. |
| var includeBranchPrefixes = []string{ |
| "refs/remotes/", |
| "refs/heads/", |
| } |
| |
| // GetBranches returns the list of branch heads in a Git repository. |
| // In order to separate local working branches from published branches, only |
| // remote branches in 'origin' are returned. |
| func GetBranches(ctx context.Context, co git.Checkout) ([]*GitBranch, error) { |
| output, err := co.Git(ctx, "show-ref") |
| if err != nil { |
| return nil, fmt.Errorf("Failed to get branch list: %v", err) |
| } |
| branches := []*GitBranch{} |
| lines := strings.Split(output, "\n") |
| for _, line := range lines { |
| if line == "" { |
| continue |
| } |
| parts := strings.SplitN(line, " ", 2) |
| if len(parts) != 2 { |
| return nil, fmt.Errorf("Could not parse output of 'git show-ref'.") |
| } |
| |
| for _, prefix := range includeBranchPrefixes { |
| if strings.HasPrefix(parts[1], prefix) { |
| name := parts[1][len(prefix):] |
| branches = append(branches, &GitBranch{ |
| Name: name, |
| Head: parts[0], |
| }) |
| } |
| } |
| } |
| return branches, nil |
| } |
| |
| // readCommitsFromGit reads the commit history from a Git repository. |
| func readCommitsFromGit(ctx context.Context, gd git.GitDir, branch string) ([]string, map[string]time.Time, error) { |
| output, err := gd.Git(ctx, "log", "--format=format:%H%x20%ci", branch) |
| if err != nil { |
| return nil, nil, fmt.Errorf("Failed to execute git log: %s", err) |
| } |
| lines := strings.Split(output, "\n") |
| gitHashes := make([]*gitHash, 0, len(lines)) |
| timestamps := map[string]time.Time{} |
| for _, line := range lines { |
| parts := strings.SplitN(line, " ", 2) |
| if len(parts) == 2 { |
| t, err := time.Parse("2006-01-02 15:04:05 -0700", parts[1]) |
| if err != nil { |
| return nil, nil, fmt.Errorf("Failed parsing Git log timestamp: %s", err) |
| } |
| t = t.UTC() |
| hash := parts[0] |
| gitHashes = append(gitHashes, &gitHash{hash: hash, timeStamp: t}) |
| timestamps[hash] = t |
| } |
| } |
| sort.Sort(gitHashSlice(gitHashes)) |
| hashes := make([]string, len(gitHashes), len(gitHashes)) |
| for i, h := range gitHashes { |
| hashes[i] = h.hash |
| } |
| return hashes, timestamps, nil |
| } |
| |
| // GetBranchCommits gets all the commits in the given branch and directory in topological order |
| // and only with the first parent (omitting commits from branches that are merged in). |
| // The earliest commits are returned first. |
| // Note: Primarily used for testing and will probably be removed in the future. |
| func GetBranchCommits(ctx context.Context, co git.Checkout, branch string) ([]*vcsinfo.IndexCommit, error) { |
| output, err := co.Git(ctx, "log", "--format=format:%H%x20%ci", "--first-parent", "--topo-order", "--reverse", branch) |
| if err != nil { |
| return nil, fmt.Errorf("Failed to execute git log: %s", err) |
| } |
| lines := strings.Split(output, "\n") |
| ret := make([]*vcsinfo.IndexCommit, 0, len(lines)) |
| for _, line := range lines { |
| parts := strings.SplitN(line, " ", 2) |
| if len(parts) == 2 { |
| t, err := time.Parse("2006-01-02 15:04:05 -0700", parts[1]) |
| if err != nil { |
| return nil, fmt.Errorf("Failed parsing Git log timestamp: %s", err) |
| } |
| t = t.UTC() |
| hash := parts[0] |
| ret = append(ret, &vcsinfo.IndexCommit{ |
| Hash: hash, |
| Timestamp: t, |
| Index: len(ret), |
| }) |
| } |
| } |
| return ret, nil |
| } |
| |
| func readCommitsFromGitAllBranches(ctx context.Context, gd git.Checkout) ([]string, map[string]time.Time, error) { |
| branches, err := GetBranches(ctx, gd) |
| if err != nil { |
| return nil, nil, fmt.Errorf("Could not read commits; unable to get branch list: %v", err) |
| } |
| timestamps := map[string]time.Time{} |
| for _, b := range branches { |
| _, ts, err := readCommitsFromGit(ctx, gd, b.Name) |
| if err != nil { |
| return nil, nil, err |
| } |
| for k, v := range ts { |
| timestamps[k] = v |
| } |
| } |
| gitHashes := make([]*gitHash, len(timestamps), len(timestamps)) |
| i := 0 |
| for h, t := range timestamps { |
| gitHashes[i] = &gitHash{hash: h, timeStamp: t} |
| i++ |
| } |
| sort.Sort(gitHashSlice(gitHashes)) |
| hashes := make([]string, len(timestamps), len(timestamps)) |
| for i, h := range gitHashes { |
| hashes[i] = h.hash |
| } |
| return hashes, timestamps, nil |
| } |
| |
| // Ensure that GitInfo implements vcsinfo.VCS. |
| var _ vcsinfo.VCS = &GitInfo{} |