blob: 3fed682ee3c9e03d0befb8432e5283855162fce7 [file] [log] [blame]
// Package gitinfo enables querying info from Git repository using git and a local checkout.
package gitinfo
import (
"context"
"fmt"
"os"
"path"
"regexp"
"sort"
"strconv"
"strings"
"sync"
"time"
"go.skia.org/infra/go/exec"
"go.skia.org/infra/go/git"
"go.skia.org/infra/go/git/git_common"
"go.skia.org/infra/go/skerr"
"go.skia.org/infra/go/sklog"
"go.skia.org/infra/go/vcsinfo"
)
// commitLineRe matches one line of commit log and captures hash, author and
// subject groups.
var commitLineRe = regexp.MustCompile(`([0-9a-f]{40}),([^,\n]+),(.+)$`)
// GitInfo allows querying a Git repo.
type GitInfo struct {
dir git.Checkout
hashes []string
timestamps map[string]time.Time // The git hash is the key.
detailsCache map[string]*vcsinfo.LongCommit // The git hash is the key.
firstCommit string
// Any access to hashes or timestamps must be protected.
mutex sync.Mutex
}
// GetBranch implements the vcsinfo.VCS interface.
func (g *GitInfo) GetBranch() string {
// This was added later and we have implicitly assumed that we are tracking
// the main branch.
return git.MasterBranch
}
// NewGitInfo creates a new GitInfo for the Git repository found in directory
// dir. If pull is true then a git pull is done on the repo before querying it
// for history.
func NewGitInfo(ctx context.Context, dir string, pull, allBranches bool) (*GitInfo, error) {
g := &GitInfo{
dir: git.CheckoutDir(dir),
hashes: []string{},
detailsCache: map[string]*vcsinfo.LongCommit{},
}
return g, g.Update(ctx, pull, allBranches)
}
// Clone creates a new GitInfo by running "git clone" in the given directory.
func Clone(ctx context.Context, repoUrl, dir string, allBranches bool) (*GitInfo, error) {
gitPath, _, _, err := git_common.FindGit(ctx)
if err != nil {
return nil, skerr.Wrap(err)
}
if _, err := exec.RunSimple(ctx, fmt.Sprintf("%s clone %s %s", gitPath, repoUrl, dir)); err != nil {
return nil, fmt.Errorf("Failed to clone %s into %s: %s", repoUrl, dir, err)
}
return NewGitInfo(ctx, dir, false, allBranches)
}
// CloneOrUpdate creates a new GitInfo by running "git clone" or "git pull"
// depending on whether the repo already exists.
func CloneOrUpdate(ctx context.Context, repoUrl, dir string, allBranches bool) (*GitInfo, error) {
gitDir := path.Join(dir, ".git")
_, err := os.Stat(gitDir)
if err == nil {
return NewGitInfo(ctx, dir, true, allBranches)
}
if os.IsNotExist(err) {
return Clone(ctx, repoUrl, dir, allBranches)
}
return nil, err
}
// Update refreshes the history that GitInfo stores for the repo. If pull is
// true then git pull is performed before refreshing.
func (g *GitInfo) Update(ctx context.Context, pull, allBranches bool) error {
g.mutex.Lock()
defer g.mutex.Unlock()
sklog.Info("Beginning Update.")
if pull {
if _, err := g.dir.Git(ctx, "pull"); err != nil {
return fmt.Errorf("Failed to sync to HEAD: %s", err)
}
}
sklog.Info("Finished pull.")
var hashes []string
var timestamps map[string]time.Time
var err error
if allBranches {
hashes, timestamps, err = readCommitsFromGitAllBranches(ctx, g.dir)
} else {
hashes, timestamps, err = readCommitsFromGit(ctx, g.dir, "HEAD")
}
sklog.Infof("Finished reading commits: %s", g.dir)
if err != nil {
return fmt.Errorf("Failed to read commits from: %s : %s", g.dir, err)
}
g.hashes = hashes
g.timestamps = timestamps
g.firstCommit, err = g.InitialCommit(ctx)
if err != nil {
return fmt.Errorf("Failed to get initial commit: %s", err)
}
return nil
}
// Dir returns the checkout dir of the GitInfo..
func (g *GitInfo) Dir() string {
return g.dir.Dir()
}
// Details returns more information than ShortCommit about a given commit.
// See the vcsinfo.VCS interface for details.
func (g *GitInfo) Details(ctx context.Context, hash string, includeBranchInfo bool) (*vcsinfo.LongCommit, error) {
g.mutex.Lock()
defer g.mutex.Unlock()
return g.details(ctx, hash, includeBranchInfo)
}
// See the vcsinfo.VCS interface for details.
func (g *GitInfo) DetailsMulti(ctx context.Context, hashes []string, includeBranchInfo bool) ([]*vcsinfo.LongCommit, error) {
g.mutex.Lock()
defer g.mutex.Unlock()
ret := make([]*vcsinfo.LongCommit, len(hashes))
for idx, hash := range hashes {
var err error
if ret[idx], err = g.details(ctx, hash, includeBranchInfo); err != nil {
return nil, err
}
}
return ret, nil
}
// details returns more information than ShortCommit about a given commit.
// See the vcsinfo.VCS interface for details.
//
// Caller is responsible for locking the mutex.
func (g *GitInfo) details(ctx context.Context, hash string, includeBranchInfo bool) (*vcsinfo.LongCommit, error) {
if c, ok := g.detailsCache[hash]; ok {
// Return the cached value if the branchInfo request matches.
if !includeBranchInfo || (len(c.Branches) > 0) {
return c, nil
}
}
output, err := g.dir.Git(ctx, "log", "-n", "1", "--format=format:%H%n%P%n%an%x20(%ae)%n%s%n%b", hash)
if err != nil {
return nil, fmt.Errorf("Failed to execute Git: %s", err)
}
lines := strings.SplitN(output, "\n", 5)
if len(lines) != 5 {
return nil, fmt.Errorf("Failed to parse output of 'git log'.")
}
branches := map[string]bool{}
if includeBranchInfo {
branches, err = g.getBranchesForCommit(ctx, hash)
if err != nil {
return nil, err
}
}
var parents []string
if lines[1] != "" {
parents = strings.Split(lines[1], " ")
}
c := vcsinfo.LongCommit{
ShortCommit: &vcsinfo.ShortCommit{
Hash: lines[0],
Author: lines[2],
Subject: lines[3],
},
Parents: parents,
Body: lines[4],
Timestamp: g.timestamps[hash],
Branches: branches,
}
g.detailsCache[hash] = &c
return &c, nil
}
func (g *GitInfo) Reset(ctx context.Context, ref string) error {
_, err := g.dir.Git(ctx, "reset", "--hard", ref)
if err != nil {
return fmt.Errorf("Failed to roll back/forward to commit %s: %s", ref, err)
}
return nil
}
func (g *GitInfo) Checkout(ctx context.Context, ref string) error {
if _, err := g.dir.Git(ctx, "checkout", ref); err != nil {
return fmt.Errorf("Failed to checkout %s: %s", ref, err)
}
return nil
}
// getBranchesForCommit returns a string set with all the branches that can reach
// the commit with the given hash.
// TODO(stephana): Speed up this method, there are either better ways to do this
// in git or the results can be cached.
func (g *GitInfo) getBranchesForCommit(ctx context.Context, hash string) (map[string]bool, error) {
output, err := g.dir.Git(ctx, "branch", "--all", "--list", "--contains", hash)
if err != nil {
return nil, fmt.Errorf("Failed to get branches for commit %s: %s", hash, err)
}
lines := strings.Split(strings.TrimSpace(output), "\n")
ret := map[string]bool{}
for _, line := range lines {
l := strings.TrimSpace(line)
if l != "" {
// Splitting the line to filter out the '*' that marks the active branch.
parts := strings.Split(l, " ")
ret[parts[len(parts)-1]] = true
}
}
return ret, nil
}
// RevList returns the results of "git rev-list".
func (g *GitInfo) RevList(ctx context.Context, args ...string) ([]string, error) {
g.mutex.Lock()
defer g.mutex.Unlock()
output, err := g.dir.Git(ctx, append([]string{"rev-list"}, args...)...)
if err != nil {
return nil, fmt.Errorf("git rev-list failed: %v", err)
}
res := strings.Trim(output, "\n")
if res == "" {
return []string{}, nil
}
return strings.Split(res, "\n"), nil
}
// From returns all commits from 'start' to HEAD.
func (g *GitInfo) From(start time.Time) []string {
g.mutex.Lock()
defer g.mutex.Unlock()
ret := []string{}
for _, h := range g.hashes {
if g.timestamps[h].After(start) {
ret = append(ret, h)
}
}
return ret
}
// Range returns all commits from the half open interval ['begin', 'end'), i.e.
// includes 'begin' and excludes 'end'.
func (g *GitInfo) Range(begin, end time.Time) []*vcsinfo.IndexCommit {
g.mutex.Lock()
defer g.mutex.Unlock()
ret := []*vcsinfo.IndexCommit{}
first := sort.Search(len(g.hashes), func(i int) bool {
ts := g.timestamps[g.hashes[i]]
return ts.After(begin) || ts.Equal(begin)
})
if first == len(g.timestamps) {
return ret
}
for i, h := range g.hashes[first:] {
if g.timestamps[h].Before(end) {
ret = append(ret, &vcsinfo.IndexCommit{
Hash: h,
Index: first + i,
Timestamp: g.timestamps[h],
})
} else {
break
}
}
return ret
}
// LastNIndex returns the last N commits.
func (g *GitInfo) LastNIndex(N int) []*vcsinfo.IndexCommit {
g.mutex.Lock()
defer g.mutex.Unlock()
var hashes []string
offset := 0
if len(g.hashes) < N {
hashes = g.hashes
} else {
hashes = g.hashes[len(g.hashes)-N:]
offset = len(g.hashes) - N
}
ret := []*vcsinfo.IndexCommit{}
for i, h := range hashes {
ret = append(ret, &vcsinfo.IndexCommit{
Hash: h,
Index: i + offset,
Timestamp: g.timestamps[h],
})
}
return ret
}
// IndexOf returns the index of given hash as counted from the first commit in
// this branch by 'rev-list'. The index is 0 based.
func (g *GitInfo) IndexOf(ctx context.Context, hash string) (int, error) {
// Count the lines from running:
// git rev-list --count <first-commit>..hash.
output, err := g.RevList(ctx, "--count", git.LogFromTo(g.firstCommit, hash))
if err != nil {
return 0, fmt.Errorf("git rev-list failed: %s", err)
}
if len(output) != 1 {
return 0, fmt.Errorf("git rev-list wrong size output: %s", err)
}
n, err := strconv.Atoi(output[0])
if err != nil {
return 0, fmt.Errorf("Didn't get a number: %s", err)
}
return n, nil
}
// ByIndex returns a LongCommit describing the commit
// at position N, as ordered in the current branch.
//
// Does not make sense if readCommitsFromGitAllBranches has been
// called.
func (g *GitInfo) ByIndex(ctx context.Context, N int) (*vcsinfo.LongCommit, error) {
g.mutex.Lock()
defer g.mutex.Unlock()
numHashes := len(g.hashes)
if N < 0 || N >= numHashes {
return nil, fmt.Errorf("Hash index not found: %d", N)
}
return g.details(ctx, g.hashes[N], false)
}
// LastN returns the last N commits.
func (g *GitInfo) LastN(ctx context.Context, N int) []string {
g.mutex.Lock()
defer g.mutex.Unlock()
if len(g.hashes) < N {
return g.hashes[0:len(g.hashes)]
} else {
return g.hashes[len(g.hashes)-N:]
}
}
// This is a temporary performance enhancement for Perf.
// It will be removed once Perf moves to gitstore.
func (g *GitInfo) TimestampAtIndex(N int) (time.Time, error) {
if N < 0 || N >= len(g.hashes) {
return time.Time{}, fmt.Errorf("Hash index not found: %d", N)
}
g.mutex.Lock()
defer g.mutex.Unlock()
return g.timestamps[g.hashes[N]], nil
}
// Timestamp returns the timestamp for the given hash.
func (g *GitInfo) Timestamp(hash string) time.Time {
g.mutex.Lock()
defer g.mutex.Unlock()
return g.timestamps[hash]
}
// Log returns a --name-only short log for every commit in (begin, end].
//
// If end is "" then it returns just the short log for the single commit at
// begin.
//
// Example response:
//
// commit b7988a21fdf23cc4ace6145a06ea824aa85db099
// Author: Joe Gregorio <jcgregorio@google.com>
// Date: Tue Aug 5 16:19:48 2014 -0400
//
// A description of the commit.
//
// perf/go/skiaperf/perf.go
// perf/go/types/types.go
// perf/res/js/logic.js
func (g *GitInfo) Log(ctx context.Context, begin, end string) (string, error) {
command := []string{"log", "--name-only"}
hashrange := begin
if end != "" {
hashrange += ".." + end
command = append(command, hashrange)
} else {
command = append(command, "-n", "1", hashrange)
}
output, err := g.dir.Git(ctx, command...)
if err != nil {
return "", err
}
return output, nil
}
// LogFine is the same as Log() but appends all the 'args' to the Log
// request to allow finer control of the log output. I.e. you could call:
//
// LogFine(begin, end, "--format=format:%ct", "infra/bots/assets/skp/VERSION")
func (g *GitInfo) LogFine(ctx context.Context, begin, end string, args ...string) (string, error) {
command := []string{"log"}
hashrange := begin
if end != "" {
hashrange += ".." + end
command = append(command, hashrange)
} else {
command = append(command, "-n", "1", hashrange)
}
command = append(command, args...)
output, err := g.dir.Git(ctx, command...)
if err != nil {
return "", err
}
return output, nil
}
// LogArgs is the same as Log() but appends all the 'args' to the Log
// request to allow finer control of the log output. I.e. you could call:
//
// LogArgs("--since=2015-10-24", "--format=format:%ct", "infra/bots/assets/skp/VERSION")
func (g *GitInfo) LogArgs(ctx context.Context, args ...string) (string, error) {
command := []string{"log"}
command = append(command, args...)
output, err := g.dir.Git(ctx, command...)
if err != nil {
return "", err
}
return output, nil
}
// FullHash gives the full commit hash for the given ref.
func (g *GitInfo) FullHash(ctx context.Context, ref string) (string, error) {
output, err := g.dir.Git(ctx, "rev-parse", fmt.Sprintf("%s^{commit}", ref))
if err != nil {
return "", fmt.Errorf("Failed to obtain full hash: %s", err)
}
return strings.Trim(output, "\n"), nil
}
// InitialCommit returns the hash of the initial commit.
func (g *GitInfo) InitialCommit(ctx context.Context) (string, error) {
output, err := g.dir.Git(ctx, "rev-list", "--max-parents=0", "--first-parent", "HEAD")
if err != nil {
return "", fmt.Errorf("Failed to determine initial commit: %v", err)
}
return strings.Trim(output, "\n"), nil
}
// GetBranches returns a slice of strings naming the branches in the repo.
func (g *GitInfo) GetBranches(ctx context.Context) ([]*GitBranch, error) {
return GetBranches(ctx, g.dir)
}
// ShortCommits stores a slice of ShortCommit struct.
type ShortCommits struct {
Commits []*vcsinfo.ShortCommit
}
// ShortList returns a slice of ShortCommit for every commit in (begin, end].
func (g *GitInfo) ShortList(ctx context.Context, begin, end string) (*ShortCommits, error) {
command := []string{"log", "--pretty='%H,%an,%s", begin + ".." + end}
output, err := g.dir.Git(ctx, command...)
if err != nil {
return nil, err
}
ret := &ShortCommits{
Commits: []*vcsinfo.ShortCommit{},
}
for _, line := range strings.Split(output, "\n") {
match := commitLineRe.FindStringSubmatch(line)
if match == nil {
// This could happen if the subject has new line, in which case we truncate it and ignore the remainder.
continue
}
commit := &vcsinfo.ShortCommit{
Hash: match[1],
Author: match[2],
Subject: match[3],
}
ret.Commits = append(ret.Commits, commit)
}
return ret, nil
}
// gitHash represents information on a single Git commit.
type gitHash struct {
hash string
timeStamp time.Time
}
type gitHashSlice []*gitHash
func (p gitHashSlice) Len() int { return len(p) }
func (p gitHashSlice) Less(i, j int) bool { return p[i].timeStamp.Before(p[j].timeStamp) }
func (p gitHashSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
// GitBranch represents a Git branch.
type GitBranch struct {
Name string `json:"name"`
Head string `json:"head"`
}
// includeBranchPrefixes is the list of branch prefixes that we should consider in the
// output of the 'git show-ref' command issued in GetBranches below.
var includeBranchPrefixes = []string{
"refs/remotes/",
"refs/heads/",
}
// GetBranches returns the list of branch heads in a Git repository.
// In order to separate local working branches from published branches, only
// remote branches in 'origin' are returned.
func GetBranches(ctx context.Context, co git.Checkout) ([]*GitBranch, error) {
output, err := co.Git(ctx, "show-ref")
if err != nil {
return nil, fmt.Errorf("Failed to get branch list: %v", err)
}
branches := []*GitBranch{}
lines := strings.Split(output, "\n")
for _, line := range lines {
if line == "" {
continue
}
parts := strings.SplitN(line, " ", 2)
if len(parts) != 2 {
return nil, fmt.Errorf("Could not parse output of 'git show-ref'.")
}
for _, prefix := range includeBranchPrefixes {
if strings.HasPrefix(parts[1], prefix) {
name := parts[1][len(prefix):]
branches = append(branches, &GitBranch{
Name: name,
Head: parts[0],
})
}
}
}
return branches, nil
}
// readCommitsFromGit reads the commit history from a Git repository.
func readCommitsFromGit(ctx context.Context, gd git.GitDir, branch string) ([]string, map[string]time.Time, error) {
output, err := gd.Git(ctx, "log", "--format=format:%H%x20%ci", branch)
if err != nil {
return nil, nil, fmt.Errorf("Failed to execute git log: %s", err)
}
lines := strings.Split(output, "\n")
gitHashes := make([]*gitHash, 0, len(lines))
timestamps := map[string]time.Time{}
for _, line := range lines {
parts := strings.SplitN(line, " ", 2)
if len(parts) == 2 {
t, err := time.Parse("2006-01-02 15:04:05 -0700", parts[1])
if err != nil {
return nil, nil, fmt.Errorf("Failed parsing Git log timestamp: %s", err)
}
t = t.UTC()
hash := parts[0]
gitHashes = append(gitHashes, &gitHash{hash: hash, timeStamp: t})
timestamps[hash] = t
}
}
sort.Sort(gitHashSlice(gitHashes))
hashes := make([]string, len(gitHashes), len(gitHashes))
for i, h := range gitHashes {
hashes[i] = h.hash
}
return hashes, timestamps, nil
}
// GetBranchCommits gets all the commits in the given branch and directory in topological order
// and only with the first parent (omitting commits from branches that are merged in).
// The earliest commits are returned first.
// Note: Primarily used for testing and will probably be removed in the future.
func GetBranchCommits(ctx context.Context, co git.Checkout, branch string) ([]*vcsinfo.IndexCommit, error) {
output, err := co.Git(ctx, "log", "--format=format:%H%x20%ci", "--first-parent", "--topo-order", "--reverse", branch)
if err != nil {
return nil, fmt.Errorf("Failed to execute git log: %s", err)
}
lines := strings.Split(output, "\n")
ret := make([]*vcsinfo.IndexCommit, 0, len(lines))
for _, line := range lines {
parts := strings.SplitN(line, " ", 2)
if len(parts) == 2 {
t, err := time.Parse("2006-01-02 15:04:05 -0700", parts[1])
if err != nil {
return nil, fmt.Errorf("Failed parsing Git log timestamp: %s", err)
}
t = t.UTC()
hash := parts[0]
ret = append(ret, &vcsinfo.IndexCommit{
Hash: hash,
Timestamp: t,
Index: len(ret),
})
}
}
return ret, nil
}
func readCommitsFromGitAllBranches(ctx context.Context, gd git.Checkout) ([]string, map[string]time.Time, error) {
branches, err := GetBranches(ctx, gd)
if err != nil {
return nil, nil, fmt.Errorf("Could not read commits; unable to get branch list: %v", err)
}
timestamps := map[string]time.Time{}
for _, b := range branches {
_, ts, err := readCommitsFromGit(ctx, gd, b.Name)
if err != nil {
return nil, nil, err
}
for k, v := range ts {
timestamps[k] = v
}
}
gitHashes := make([]*gitHash, len(timestamps), len(timestamps))
i := 0
for h, t := range timestamps {
gitHashes[i] = &gitHash{hash: h, timeStamp: t}
i++
}
sort.Sort(gitHashSlice(gitHashes))
hashes := make([]string, len(timestamps), len(timestamps))
for i, h := range gitHashes {
hashes[i] = h.hash
}
return hashes, timestamps, nil
}
// Ensure that GitInfo implements vcsinfo.VCS.
var _ vcsinfo.VCS = &GitInfo{}