blob: 76eeb53caf103df08e1b8df33f168991f38e1554 [file] [log] [blame]
package gitiles
import (
Utilities for working with Gitiles.
const (
COMMIT_URL = "%s/+/%s?format=JSON"
DATE_FORMAT_NO_TZ = "Mon Jan 02 15:04:05 2006"
DATE_FORMAT_TZ = "Mon Jan 02 15:04:05 2006 -0700"
DOWNLOAD_URL = "%s/+/%s/%s?format=TEXT"
LOG_URL = "%s/+log/%s?format=JSON"
REFS_URL = "%s/+refs%%2Fheads?format=JSON"
TAGS_URL = "%s/+refs%%2Ftags?format=JSON"
// These were copied from the defaults used by gitfs:
MAX_QPS = rate.Limit(4.0)
var (
ErrStopIteration = errors.New("stop iteration")
// Repo is an object used for interacting with a single Git repo using Gitiles.
type Repo struct {
client *http.Client
rl *rate.Limiter
URL string
// NewRepo creates and returns a new Repo object.
func NewRepo(url string, c *http.Client) *Repo {
// TODO(borenet):Stop supporting a nil client; we should enforce that we
// always use an authenticated client to talk to Gitiles.
if c == nil {
c = httputils.NewTimeoutClient()
return &Repo{
client: c,
rl: rate.NewLimiter(MAX_QPS, MAX_BURST),
URL: url,
// get executes a GET request to the given URL, returning the http.Response.
func (r *Repo) get(ctx context.Context, url string) (*http.Response, error) {
// Respect the rate limit.
if err := r.rl.Wait(ctx); err != nil {
return nil, err
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
req = req.WithContext(ctx)
resp, err := r.client.Do(req)
if err != nil {
return nil, err
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("Request got status %q", resp.Status)
return resp, nil
// getJson executes a GET request to the given URL, reads the response and
// unmarshals it to the given destination.
func (r *Repo) getJson(ctx context.Context, url string, dest interface{}) error {
resp, err := r.get(ctx, url)
if err != nil {
return err
defer util.Close(resp.Body)
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
return skerr.Fmt("Failed to read response: %s", err)
// Remove the first line.
b = b[4:]
return skerr.Wrap(json.Unmarshal(b, dest))
// ReadFileAtRef reads the given file at the given ref.
func (r *Repo) ReadFileAtRef(ctx context.Context, srcPath, ref string, w io.Writer) error {
resp, err := r.get(ctx, fmt.Sprintf(DOWNLOAD_URL, r.URL, ref, srcPath))
if err != nil {
return err
defer util.Close(resp.Body)
d := base64.NewDecoder(base64.StdEncoding, resp.Body)
if _, err := io.Copy(w, d); err != nil {
return err
return nil
// ReadFile reads the current version of the given file from the master branch
// of the Repo.
func (r *Repo) ReadFile(ctx context.Context, srcPath string, w io.Writer) error {
return r.ReadFileAtRef(ctx, srcPath, "master", w)
// DownloadFile downloads the current version of the given file from the master
// branch of the Repo.
func (r *Repo) DownloadFile(ctx context.Context, srcPath, dstPath string) error {
return util.WithWriteFile(dstPath, func(w io.Writer) error {
return r.ReadFile(ctx, srcPath, w)
// DownloadFileAtRef downloads the given file at the given ref.
func (r *Repo) DownloadFileAtRef(ctx context.Context, srcPath, ref, dstPath string) error {
return util.WithWriteFile(dstPath, func(w io.Writer) error {
return r.ReadFileAtRef(ctx, srcPath, ref, w)
// ListDirAtRef reads the given directory at the given ref. Returns a slice of
// file names and a slice of dir names, relative to the given directory, or any
// error which occurred.
func (r *Repo) ListDirAtRef(ctx context.Context, dir, ref string) ([]string, []string, error) {
path := strings.TrimSuffix(dir, "/")
buf := bytes.Buffer{}
if err := r.ReadFileAtRef(ctx, path, ref, &buf); err != nil {
return nil, nil, err
files := []string{}
dirs := []string{}
for _, line := range strings.Split(strings.TrimSpace(buf.String()), "\n") {
// Lines are formatted as follows:
// mode tree|blob hash name
fields := strings.Fields(line)
if len(fields) != 4 {
return nil, nil, fmt.Errorf("Got invalid response from gitiles. Expected format \"mode tree|blob hash name\" but got:\n %s", buf.String())
if fields[1] == "tree" {
dirs = append(dirs, fields[3])
} else if fields[1] == "blob" {
files = append(files, fields[3])
} else {
return nil, nil, fmt.Errorf("Got invalid response from gitiles. Expected format \"mode tree|blob hash name\" but got %q instead of \"tree\" or \"blob\".", fields[1])
return files, dirs, nil
// ListDir reads the given directory on the master branch. Returns a slice of
// file names and a slice of dir names, relative to the given directory, or any
// error which occurred.
func (r *Repo) ListDir(ctx context.Context, dir string) ([]string, []string, error) {
return r.ListDirAtRef(ctx, dir, "master")
// ResolveRef resolves the given ref to a commit hash.
func (r *Repo) ResolveRef(ctx context.Context, ref string) (string, error) {
commit, err := r.Details(ctx, ref)
if err != nil {
return "", err
return commit.Hash, nil
// ListFilesRecursiveAtRef returns a list of all file paths, relative to the
// given directory, under the given directory at the given ref.
func (r *Repo) ListFilesRecursiveAtRef(ctx context.Context, topDir, ref string) ([]string, error) {
// First, resolve the given ref to a commit hash to ensure that we
// return consistent results even if the ref changes between requests.
hash, err := r.ResolveRef(ctx, ref)
if err != nil {
return nil, err
// List files recursively.
rv := []string{}
var helper func(string) error
helper = func(dir string) error {
files, dirs, err := r.ListDirAtRef(ctx, dir, hash)
if err != nil {
return err
for _, f := range files {
rv = append(rv, strings.TrimPrefix(dir+"/"+f, topDir+"/"))
for _, d := range dirs {
if err := helper(dir + "/" + d); err != nil {
return err
return nil
if err := helper(topDir); err != nil {
return nil, err
return rv, nil
// ListFilesRecursive returns a list of all file paths, relative to the given
// directory, under the given directory on the master branch.
func (r *Repo) ListFilesRecursive(ctx context.Context, dir string) ([]string, error) {
return r.ListFilesRecursiveAtRef(ctx, dir, "master")
type Author struct {
Name string `json:"name"`
Email string `json:"email"`
Time string `json:"time"`
type Commit struct {
Commit string `json:"commit"`
Parents []string `json:"parents"`
Author *Author `json:"author"`
Committer *Author `json:"committer"`
Message string `json:"message"`
type Log struct {
Log []*Commit `json:"log"`
Next string `json:"next"`
func commitToLongCommit(c *Commit) (*vcsinfo.LongCommit, error) {
var ts time.Time
var err error
if strings.Contains(c.Committer.Time, " +") || strings.Contains(c.Committer.Time, " -") {
ts, err = time.Parse(DATE_FORMAT_TZ, c.Committer.Time)
} else {
ts, err = time.Parse(DATE_FORMAT_NO_TZ, c.Committer.Time)
if err != nil {
return nil, err
split := strings.Split(c.Message, "\n")
subject := split[0]
split = split[1:]
body := ""
if len(split) > 1 && split[0] == "" {
split = split[1:]
if len(split) > 1 {
body = strings.Join(split, "\n")
return &vcsinfo.LongCommit{
ShortCommit: &vcsinfo.ShortCommit{
Hash: c.Commit,
Author: fmt.Sprintf("%s (%s)", c.Author.Name, c.Author.Email),
Subject: subject,
Parents: c.Parents,
Body: body,
Timestamp: ts,
}, nil
// Details returns a vcsinfo.LongCommit for the given commit.
func (r *Repo) Details(ctx context.Context, ref string) (*vcsinfo.LongCommit, error) {
var c Commit
if err := r.getJson(ctx, fmt.Sprintf(COMMIT_URL, r.URL, ref), &c); err != nil {
return nil, err
return commitToLongCommit(&c)
// LogOption represents an optional parameter to a Log function.
type LogOption string
// LogReverse is a LogOption which indicates that the commits in the Log should
// be returned in reverse order from the typical "git log" ordering, ie. each
// commit's parents appear before the commit itself.
func LogReverse() LogOption {
return LogOption("reverse=true")
// LogBatchSize is a LogOption which indicates the number of commits which
// should be included in each batch of commits returned by Log.
func LogBatchSize(n int) LogOption {
return LogOption(fmt.Sprintf("n=%d", n))
// logHelper is used to perform requests which are equivalent to "git log".
// Loads commits in batches and calls the given function for each batch of
// commits. If the function returns an error, iteration stops, and the error is
// returned, unless it was ErrStopIteration.
func (r *Repo) logHelper(ctx context.Context, url string, fn func(context.Context, []*vcsinfo.LongCommit) error, opts ...LogOption) error {
for _, opt := range opts {
url += "&" + string(opt)
start := ""
for {
var l Log
u := url
if start != "" {
u += "&s=" + start
if err := r.getJson(ctx, u, &l); err != nil {
return err
// Convert to vcsinfo.LongCommit.
commits := make([]*vcsinfo.LongCommit, 0, len(l.Log))
for _, c := range l.Log {
vc, err := commitToLongCommit(c)
if err != nil {
return err
commits = append(commits, vc)
if err := fn(ctx, commits); err == ErrStopIteration {
return nil
} else if err != nil {
return err
if l.Next == "" {
return nil
} else {
start = l.Next
// Log returns Gitiles' equivalent to "git log" for the given expression.
func (r *Repo) Log(ctx context.Context, logExpr string, opts ...LogOption) ([]*vcsinfo.LongCommit, error) {
rv := []*vcsinfo.LongCommit{}
url := fmt.Sprintf(LOG_URL, r.URL, logExpr)
if err := r.logHelper(ctx, url, func(ctx context.Context, commits []*vcsinfo.LongCommit) error {
rv = append(rv, commits...)
return nil
}, opts...); err != nil {
return nil, err
return rv, nil
// LogLinear is equivalent to "git log --first-parent --ancestry-path",
// ie. it only returns commits which are on the direct path from A to B, and
// only on the "main" branch. This is as opposed to "git log" which
// returns all commits which are ancestors of 'to' but not 'from'.
func (r *Repo) LogLinear(ctx context.Context, from, to string, opts ...LogOption) ([]*vcsinfo.LongCommit, error) {
// Retrieve the normal "git log".
commits, err := r.Log(ctx, git.LogFromTo(from, to), opts...)
if err != nil {
return nil, err
if len(commits) == 0 {
return commits, nil
// Now filter to only those commits which are on the direct path.
commitsMap := make(map[string]*vcsinfo.LongCommit, len(commits))
for _, commit := range commits {
commitsMap[commit.Hash] = commit
isDescendant := make(map[string]bool, len(commits))
var search func(string) bool
search = func(hash string) bool {
// Shortcut if we've already searched this commit.
if rv, ok := isDescendant[hash]; ok {
return rv
// If the commit isn't in our list, we can't include it.
commit, ok := commitsMap[hash]
if !ok {
isDescendant[hash] = false
return false
// The commit is on the ancestry path if it is reachable from
// "to" and a descendant of "from". The former case is handled
// by vanilla "git log", so we just need to find the commits
// which have "from" as an ancestor.
// If "from" is a parent of this commit, it's on the ancestry
// path.
if util.In(from, commit.Parents) {
isDescendant[hash] = true
return true
// If the first parent of this commit is on the direct line,
// then this commit is as well.
if search(commit.Parents[0]) {
isDescendant[hash] = true
return true
return false
rv := make([]*vcsinfo.LongCommit, 0, len(commits))
for _, commit := range commits {
if isDescendant[commit.Hash] {
rv = append(rv, commit)
return rv, nil
// LogFn runs the given function for each commit in the log for the given
// expression. It stops when ErrStopIteration is returned.
func (r *Repo) LogFn(ctx context.Context, logExpr string, fn func(context.Context, *vcsinfo.LongCommit) error, opts ...LogOption) error {
return r.LogFnBatch(ctx, logExpr, func(ctx context.Context, commits []*vcsinfo.LongCommit) error {
for _, c := range commits {
if err := fn(ctx, c); err != nil {
return err
return nil
}, opts...)
// LogFnBatch is the same as LogFn but it runs the given function over batches
// of commits.
func (r *Repo) LogFnBatch(ctx context.Context, logExpr string, fn func(context.Context, []*vcsinfo.LongCommit) error, opts ...LogOption) error {
url := fmt.Sprintf(LOG_URL, r.URL, logExpr)
return r.logHelper(ctx, url, fn, opts...)
// Ref represents a single ref, as returned by the API.
type Ref struct {
Value string `json:"value"`
// RefsMap is the result of a request to REFS_URL.
type RefsMap map[string]Ref
// Branches returns the list of branches in the repo.
func (r *Repo) Branches(ctx context.Context) ([]*git.Branch, error) {
branchMap := RefsMap{}
if err := r.getJson(ctx, fmt.Sprintf(REFS_URL, r.URL), &branchMap); err != nil {
return nil, err
rv := make([]*git.Branch, 0, len(branchMap))
for branch, v := range branchMap {
rv = append(rv, &git.Branch{
Name: branch,
Head: v.Value,
return rv, nil
// Tags returns the list of tags in the repo. The returned map has tag names as
// keys and commit hashes as values.
func (r *Repo) Tags(ctx context.Context) (map[string]string, error) {
tags := map[string]struct {
Value string `json:"value"`
Peeled string `json:"peeled"`
if err := r.getJson(ctx, fmt.Sprintf(TAGS_URL, r.URL), &tags); err != nil {
return nil, err
rv := make(map[string]string, len(tags))
for k, tag := range tags {
rv[k] = tag.Value
return rv, nil