blob: 1f2e34fdcfbf37f410f88b72b751fcdd5733317a [file] [log] [blame]
// docset keeps track of checkouts of a repository of Markdown documents.
package docset
/*
DocSets work around a `workDir` which is presumed to be a directory with enough space
to handle many checkouts of the Markdown repository. It has the following structure:
{workDir}
/primary/ - The primary checkout of the Markdown repository.
/patches/
{issue_id}-{patchset_id}/ - A patched checkout of the Markdown respository.
Each repo should have a directory /site that contains all the documentation in Markdown
and associated files such as PNG images, For example:
site
├── dev
│ ├── contrib
│ │ ├── codereviews.md
│ │ └── index.md
│ └── index.md
├── index.md
├── logo.png
├── roles.md
├── roles.png
├── user
│ ├── api.md
│ ├── download.md
│ ├── index.md
│ ├── issue-tracker.md
│ └── quick
│ ├── index.md
│ └── linux.md
└── xtra
└── index.md
*/
import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"net/mail"
"os"
"path"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
"sync"
"text/template"
"time"
"github.com/golang/groupcache/lru"
"go.skia.org/infra/docserverk/go/config"
"go.skia.org/infra/go/auth"
"go.skia.org/infra/go/gerrit"
"go.skia.org/infra/go/git"
"go.skia.org/infra/go/git/gitinfo"
"go.skia.org/infra/go/httputils"
"go.skia.org/infra/go/sklog"
"go.skia.org/infra/go/util"
)
const (
MARKDOWN_CACHE_SIZE = 100
)
var (
gc *gerrit.Gerrit
IssueCommittedErr = errors.New("The requested issue is merged.")
// markdownHeader matches the hashes that appear at the beginning of a
// header.
markdownHeader = regexp.MustCompile(`^#+\ `)
)
func Init(local bool) error {
ts, err := auth.NewDefaultTokenSource(local, auth.SCOPE_GERRIT)
if err != nil {
return err
}
client := httputils.DefaultClientConfig().WithTokenSource(ts).With2xxOnly().Client()
gc, err = gerrit.NewGerrit(gerrit.GERRIT_SKIA_URL, client)
return err
}
// DocSet is a single checked out repository of Markdown documents.
type DocSet struct {
// repoDir is the directory the repo is checked out into.
repoDir string
// navigation is the HTML formatted navigation structure for the given repo.
navigation string
// A site map served to the Google crawler.
siteMap string
cache *lru.Cache
mutex sync.Mutex
}
// newDocSet does the core of the work for both NewDocSet and NewDocSetForIssue.
//
// The repo is checked out somewhere under workDir.
// If a valid issue and patchset are supplied then the repo will be patched with that CL.
// If refresh is true then the git repo will be periodically refreshed (git pull).
func newDocSet(ctx context.Context, workDir, repo string, issue, patchset int64, refresh bool) (*DocSet, error) {
primaryDir := filepath.Join(workDir, "primary")
issueDir := filepath.Join(workDir, "patches", fmt.Sprintf("%d-%d", issue, patchset))
repoDir := primaryDir
if issue > 0 {
repoDir = issueDir
if _, err := os.Stat(issueDir); err == nil {
d := &DocSet{
repoDir: repoDir,
}
d.BuildNavigation()
return d, nil
}
}
if issue > 0 {
info, err := gc.GetIssueProperties(ctx, issue)
if err != nil {
return nil, fmt.Errorf("Failed to load issue info: %s", err)
}
if info.Committed {
return nil, IssueCommittedErr
}
}
var gi *gitinfo.GitInfo
var err error
if issue > 0 {
gi, err = gitinfo.CloneOrUpdate(ctx, primaryDir, repoDir, false)
} else {
gi, err = gitinfo.CloneOrUpdate(ctx, repo, repoDir, false)
}
if err != nil {
return nil, fmt.Errorf("Failed to CloneOrUpdate repo %q: %s", repo, err)
}
if issue > 0 {
// Run a git fetch for the branch where gerrit stores patches.
//
// refs/changes/46/4546/1
// | | |
// | | +-> Patch set.
// | |
// | +-> Issue ID.
// |
// +-> Last two digits of Issue ID.
issuePostfix := issue % 100
output, err := git.GitDir(repoDir).Git(ctx, "fetch", repo, fmt.Sprintf("refs/changes/%02d/%d/%d", issuePostfix, issue, patchset))
if err != nil {
return nil, fmt.Errorf("Failed to execute Git %q: %s", output, err)
}
err = gi.Checkout(ctx, "FETCH_HEAD")
if err != nil {
return nil, fmt.Errorf("Failed to CloneOrUpdate repo %q: %s", repo, err)
}
}
d := &DocSet{
repoDir: repoDir,
}
d.BuildNavigation()
if refresh {
go func() {
for range time.Tick(config.REFRESH) {
util.LogErr(gi.Update(ctx, true, false))
d.BuildNavigation()
}
}()
}
return d, nil
}
// NewPreviewDocSet creates a new DocSet, one that is not refreshed.
func NewPreviewDocSet() (*DocSet, error) {
// Start from cwd and move up until you find a .git file, then use that dir as repoDir.
dir, err := os.Getwd()
if err != nil {
return nil, fmt.Errorf("Can't find cwd: %s", err)
}
for {
if _, err := os.Stat(filepath.Join(dir, ".git")); os.IsNotExist(err) {
dir = path.Dir(dir)
} else {
break
}
if dir == "/" || dir == "." {
return nil, fmt.Errorf("docserver --preview must be run from within the Git repo.")
}
}
d := &DocSet{
repoDir: dir,
}
d.BuildNavigation()
d.cache = nil
return d, nil
}
// NewDocSet creates a new DocSet, one that is periodically refreshed.
func NewDocSet(ctx context.Context, workDir, repo string) (*DocSet, error) {
return newDocSet(ctx, workDir, repo, -1, -1, true)
}
// NewDocSetForIssue creates a new DocSet patched to the latest patch level of
// the given issue.
//
// The returned DocSet is not periodically refreshed.
func NewDocSetForIssue(ctx context.Context, workDir, repo string, issue int64) (*DocSet, error) {
info, err := gc.GetIssueProperties(ctx, issue)
if err != nil {
return nil, fmt.Errorf("Failed to load issue info: %s", err)
}
patchset := int64(len(info.Revisions))
if patchset == 0 {
return nil, fmt.Errorf("Failed to find a patchset for issue %d.", issue)
}
addr, err := mail.ParseAddress(info.Owner.Email)
if err != nil {
return nil, fmt.Errorf("CL contains invalid author email: %s", err)
}
domain := strings.Split(addr.Address, "@")[1]
if !util.In(domain, config.WHITELIST) {
return nil, fmt.Errorf("User is not authorized to test docset CLs.")
}
return newDocSet(ctx, workDir, repo, issue, patchset, false)
}
// RawFilename returns the absolute filename for the file associated with the
// given url.
//
// The bool returned will be true if the url identifies a raw resource, such as
// a PNG, as opposed to a Markdown file that should be processed.
func (d *DocSet) RawFilename(url string) (string, bool, error) {
startFilename := filepath.Join(d.repoDir, config.REPO_SUBDIR, url)
endFilename, err := findFile(startFilename)
return endFilename, (startFilename == endFilename), err
}
// Body returns the contents of the given filename.
func (d *DocSet) Body(filename string) ([]byte, error) {
d.mutex.Lock()
defer d.mutex.Unlock()
var err error = nil
var body interface{} = nil
ok := false
if d.cache != nil {
body, ok = d.cache.Get(filename)
}
if !ok {
body, err = ioutil.ReadFile(filename)
if err == nil {
if d.cache != nil {
d.cache.Add(filename, body)
}
}
}
return body.([]byte), err
}
// hasPrefix returns true if p is a prefix of a.
func hasPrefix(a, p []string) bool {
if len(p) > len(a) {
return false
}
for i, s := range p {
if s != a[i] {
return false
}
}
return true
}
// diff determines how many levels of ul's we need to push and pop.
func diff(current, next []string) (int, int) {
// Start by popping off values from the end of 'next' until we get a prefix
// of 'current', which may be the empty list. Use that to calculate how many
// </ul>'s we need to emit.
end := 1
for i := 0; i <= len(next); i++ {
if hasPrefix(current, next[:len(next)-i]) {
end = len(current) - (len(next) - i)
break
}
}
// If we are just adding a file in a new directory then don't end the list.
if len(current) > 0 && len(next) > 0 && end == 1 && current[len(current)-1] == "" {
end = 0
}
// We are always going to begin a new list.
begin := 1
// Unless we are adding in a file in the same directory, in which case do nothing.
if len(current) > 0 && len(next) > 0 && end == 1 && current[len(current)-1] != "" && next[len(next)-1] != "" {
end = 0
begin = 0
}
return end, begin
}
// siteMapTemplate is a self-refrential template used to recursively expand over node tree.
var siteMapTemplate = template.Must(template.New("SITENODE").Parse(`https://skia.org{{.Index.URL}}
{{range .Files}}https://skia.org{{.URL}}
{{end}}{{range .Dirs}}{{template "SITENODE" .}}{{end}}`))
// nodeToSite converts the node to a sitemap.
func nodeToSite(n *node, depth int) string {
b := &bytes.Buffer{}
if err := siteMapTemplate.Execute(b, n); err != nil {
sklog.Errorf("Failed to expand: %s", err)
return ""
}
return b.String()
}
// navTemplate is a self-refrential template used to recursively expand over node tree.
var navTemplate = template.Must(template.New("NODE").Parse(`
<li><a data-path="{{.Index.URL}}" href="{{.Index.URL}}">{{.Index.Name}}</a></li>
<ul class=files>
{{range .Files}}
<li><a data-path="{{.URL}}" href="{{.URL}}">{{.Name}}</a></li>
{{end}}
</ul>
<ul class="dirs depth{{.Index.Depth}}">
{{range .Dirs}}
{{template "NODE" .}}
{{end}}
</ul>`))
// buildSiteMapconverts a slice of navEntry's into an HTML formatted
// site map.
func buildSiteMap(n *node) string {
return nodeToSite(n, 1)
}
// nodeToHTML converts the node to HTML, keeping track of depth for pretty printing the output.
func nodeToHTML(n *node, depth int) string {
b := &bytes.Buffer{}
if err := navTemplate.Execute(b, n); err != nil {
sklog.Errorf("Failed to expand: %s", err)
return ""
}
return b.String()
}
// metadata is the struct for deserializing JSON found in METADATA files.
type metadata struct {
DirOrder []string `json:"dirOrder"`
FileOrder []string `json:"fileOrder"`
}
// buildNavString converts a slice of navEntry's into an HTML formatted
// navigation structure.
func buildNavString(n *node) string {
return "\n<ul class=depth0>\n" + nodeToHTML(n, 1) + "</ul>\n"
}
// node is a single directory of site docs.
type node struct {
Index navEntry
Dirs []*node
Files []*navEntry
}
// nodeSlice is for sorting nodes.
type nodeSlice []*node
func (p nodeSlice) Len() int { return len(p) }
func (p nodeSlice) Less(i, j int) bool { return p[i].Index.URL < p[j].Index.URL }
func (p nodeSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
// walk the directory tree below root and populate a tree stucture of nodes.
func walk(root, path string) (*node, error) {
ret := &node{
Dirs: []*node{},
Files: []*navEntry{},
}
// for each directory fill in the navEntry for index.md
rel, _ := filepath.Rel(root, path)
rel = filepath.Clean("/" + rel)
ret.Index = navEntry{
URL: rel,
Name: readTitle(filepath.Join(path, "index.md"), rel),
}
m := &metadata{
DirOrder: []string{},
FileOrder: []string{},
}
// populate all the other files
f, err := os.Open(path)
if err != nil {
return nil, err
}
allFiles, err := f.Readdir(-1)
if err != nil {
return nil, err
}
for _, fi := range allFiles {
// The contents are either files or directories.
if fi.IsDir() {
n, err := walk(root, filepath.Join(path, fi.Name()))
if err != nil {
return nil, err
}
ret.Dirs = append(ret.Dirs, n)
} else if fi.Name() != "index.md" && strings.HasSuffix(fi.Name(), ".md") {
fileRel := filepath.Clean(rel + "/" + fi.Name()[:len(fi.Name())-3])
ret.Files = append(ret.Files, &navEntry{
URL: fileRel,
Name: readTitle(filepath.Join(path, fi.Name()), fileRel),
})
} else if fi.Name() == "METADATA" {
// Load JSON found in METADATA.
metaPath := filepath.Join(path, fi.Name())
f, err := os.Open(metaPath)
if err != nil {
sklog.Warningf("Failed to open %q: %s", metaPath, err)
continue
}
dec := json.NewDecoder(f)
if err := dec.Decode(m); err != nil {
sklog.Warningf("Failed to decode %q: %s", metaPath, err)
}
}
}
// Sort dirs and files, use METADATA if available.
// Pick out the matches in the order they appear, then sort the rest.
// Yes, this is O(n^2), but for very small n.
sortedDirs := []*node{}
for _, name := range m.DirOrder {
for i, n := range ret.Dirs {
if name == filepath.Base(n.Index.URL) {
sortedDirs = append(sortedDirs, n)
ret.Dirs = append(ret.Dirs[:i], ret.Dirs[i+1:]...)
break
}
}
}
sort.Sort(nodeSlice(ret.Dirs))
ret.Dirs = append(sortedDirs, ret.Dirs...)
sortedFiles := []*navEntry{}
for _, name := range m.FileOrder {
for i, n := range ret.Files {
if name == filepath.Base(n.URL) {
sortedFiles = append(sortedFiles, n)
ret.Files = append(ret.Files[:i], ret.Files[i+1:]...)
break
}
}
}
sort.Sort(navEntrySlice(ret.Files))
ret.Files = append(sortedFiles, ret.Files...)
return ret, nil
}
func printnode(n *node, depth int) {
sklog.Infof("Node: %*s%#v\n", depth*2, "", n.Index)
for _, f := range n.Files {
sklog.Infof("File: %*s%#v\n", (depth+1)*2, "", *f)
}
for _, d := range n.Dirs {
printnode(d, depth+1)
}
}
func addDepth(n *node, depth int) {
n.Index.Depth = depth
for _, d := range n.Dirs {
addDepth(d, depth+1)
}
}
// BuildNavigation builds the Navigation for the DocSet.
func (d *DocSet) BuildNavigation() {
// Walk the directory tree to build the navigation menu.
root := filepath.Join(d.repoDir, config.REPO_SUBDIR)
node, _ := walk(root, root)
addDepth(node, 1)
s := buildNavString(node)
sm := buildSiteMap(node)
d.mutex.Lock()
defer d.mutex.Unlock()
d.cache = lru.New(MARKDOWN_CACHE_SIZE)
d.navigation = s
d.siteMap = sm
}
// Navigation returns the HTML formatted navigation.
func (d *DocSet) Navigation() string {
d.mutex.Lock()
defer d.mutex.Unlock()
return d.navigation
}
// SiteMap returns the txt formatted site map.
func (d *DocSet) SiteMap() string {
d.mutex.Lock()
defer d.mutex.Unlock()
return d.siteMap
}
// issueAndPatch is a regex for extracting the issue number from a directory name
// that is formatted like {issue_id}-{pathset_id}.
var issueAndPatch = regexp.MustCompile("([0-9]+)-[0-9]+")
// StartCleaner is a process that periodically checks the status of every issue
// that has been previewed and removes all the local files for closed issues.
func StartCleaner(workDir string) {
sklog.Info("Starting Cleaner")
for range time.Tick(config.REFRESH) {
matches, err := filepath.Glob(workDir + "/patches/*")
sklog.Infof("Matches: %v", matches)
if err != nil {
sklog.Errorf("Failed to retrieve list of patched checkouts: %s", err)
continue
}
for _, filename := range matches {
_, file := filepath.Split(filename)
sklog.Info(file)
m := issueAndPatch.FindStringSubmatch(file)
if len(m) < 2 {
continue
}
issue, err := strconv.ParseInt(m[1], 10, 64)
if err != nil {
sklog.Errorf("Failed to parse %q as int: %s", m[1], err)
continue
}
info, err := gc.GetIssueProperties(context.TODO(), issue)
// Delete closed and missing issues.
if err != nil || info.Committed {
if err := os.RemoveAll(filename); err != nil {
sklog.Errorf("Failed to remove %q: %s", filename, err)
}
}
}
}
}
// findFile takes a filename guess and turns into real name of a file.
//
// Look for the given file, if it exists then serve it raw with a guess at the
// content type. Otherwise append ".md" and return it as processed markdown.
// If it is a directory append "index.md" and return that.
func findFile(filename string) (string, error) {
if stat, err := os.Stat(filename); err == nil {
if stat.IsDir() {
return findFile(filepath.Join(filename, "./index.md"))
} else {
return filename, nil
}
} else {
if filepath.Ext(filename) == ".md" {
return filename, err
} else {
return findFile(filename + ".md")
}
}
}
// pathOf returns a '/' terminated path for the given filename.
func pathOf(s string) string {
if s[len(s)-1] == '/' {
return s
}
parts := strings.Split(s, "/")
if len(parts) > 0 {
parts = parts[:len(parts)-1]
}
ret := strings.Join(parts, "/")
if len(ret) == 0 || ret[len(ret)-1] != '/' {
ret += "/"
}
return ret
}
// navEntry is a single directory entry in the Markdown repo.
type navEntry struct {
Depth int
URL string
Name string
}
// navEntrySlice is a utility type for sorting navEntry's.
type navEntrySlice []*navEntry
func (p navEntrySlice) Len() int { return len(p) }
func (p navEntrySlice) Less(i, j int) bool {
if pathOf(p[i].URL) < pathOf(p[j].URL) {
return true
}
if pathOf(p[i].URL) == pathOf(p[j].URL) {
return p[i].URL < p[j].URL
}
return false
}
func (p navEntrySlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
// readTitle reads the first line from a Markdown file.
func readTitle(filename, def string) string {
f, err := os.Open(filename)
if err != nil {
sklog.Warningf("Failed to open file %s: %s", filename, err)
return def
}
defer util.Close(f)
reader := bufio.NewReader(f)
title, err := reader.ReadString('\n')
if err != nil {
sklog.Warningf("Failed to read title %s: %s", filename, err)
}
if strings.HasPrefix(title, "#") {
title = markdownHeader.ReplaceAllString(title, "")
}
title = strings.TrimSpace(title)
return title
}