blob: b15231ee3c0578ee6188214ed42e037a96d01fd1 [file] [log] [blame]
package baseline
import (
"bufio"
"fmt"
"io"
"regexp"
"sort"
"strings"
"go.skia.org/infra/go/sklog"
"go.skia.org/infra/golden/go/types"
)
// WIP - Very experimental and probably not working yet. Do not use in production !
// This is a first draft and has not been tested in how it would do against actual merges.
// This implements a serialization format for expectations/baselines that can be merged
// automatically with Git with low probability of merge conflicts.
//
// Expectations are stored in a text file following this structure:
// - Each line contains the expectations for one test.
// - The tokens of each line are separated by exactly one white space.
// - The first token is the test name.
// - Each tokens following the test name are labeled digests.
// - Each labeled digest follows this format: <hex_encoded_md5_hash>:<label>, where
// hex_encoded_md5_hash is 32 characters long and label is one 'u', 'p', 'n'
// (short for 'untriaged', 'positive', 'negative')
// - All digests within a line are sorted in ascending order.
// - All test names within the file are sorted in ascending order.
// - Empty lines and lines starting with '#' are ignored.
//
// Note: Labeling the digests might now be necessary, but we have it here so we don't lose any
// information when serializing -> deserializing.
var (
// isMD5 is used to verify that the given string is a hex-encoded MD5 hash.
isMD5 = regexp.MustCompile(`^[0-9a-f]{32}$`)
// validDigestLabel is used to verify that the digest pair follows the format
// described above.
validDigestLabel = regexp.MustCompile(`^([0-9a-f]{32}):(u|p|n)$`)
// labelToCh maps a label to a character.
labelToCh = map[types.Label]string{
types.UNTRIAGED: "u",
types.POSITIVE: "p",
types.NEGATIVE: "n",
}
// chToLabel maps a character to a label.
chToLabel = map[string]types.Label{
"u": types.UNTRIAGED,
"p": types.POSITIVE,
"n": types.NEGATIVE,
}
)
// WriteMergeableBaseline writes the given expectations to the provided Writer in a file
// format that should be easy to merge for git.
// The input is checked against these conditions:
// - No empty test names are allowed
// - All digests must be valid hex-encoded MD5 hashes (32 characters).
func WriteMergeableBaseline(w io.Writer, baseLine types.TestExp) error {
allLines := make([]string, 0, len(baseLine))
for testName, digests := range baseLine {
if testName == "" {
return sklog.FmtErrorf("Received emtpy testname.")
}
digestList := make([]string, 0, len(digests))
for d, label := range digests {
if !isMD5.MatchString(d) {
return sklog.FmtErrorf("Expected hex-encoded MD5 hash. Got: %q", d)
}
digestList = append(digestList, combineDigestLabel(d, label))
}
sort.Strings(digestList)
line := fmt.Sprintf("%s %s", testName, strings.Join(digestList, " "))
allLines = append(allLines, line)
}
sort.Strings(allLines)
for _, line := range allLines {
if _, err := w.Write([]byte(line + "\n")); err != nil {
return sklog.FmtErrorf("Error writing line to writer: %s", err)
}
}
return nil
}
// ReadMergeableBaseline reads the expectations from the given reader, expecting the file format
// described above.
// It assumes that the given input file can be the result of Git merging two files that were
// previously written via the WriteMergeableBaseline function.
// It check that the input is consistent with the file format described above.
func ReadMergeableBaseline(r io.Reader) (types.TestExp, error) {
lines, err := readLines(r)
if err != nil {
return nil, sklog.FmtErrorf("Error reading lines: %s", err)
}
if len(lines) == 0 {
return types.TestExp{}, nil
}
previousTest, digests, err := parseLine(lines[0])
if err != nil {
return nil, sklog.FmtErrorf("Error parsing the first line: %s", err)
}
ret := types.TestExp{previousTest: digests}
for _, line := range lines[1:] {
testName, digests, err := parseLine(line)
if err != nil {
return nil, sklog.FmtErrorf("Error parsing line: %s", err)
}
if testName < previousTest {
return nil, sklog.FmtErrorf("Testnames are not monotonically increasing: %s < %s is false", previousTest, testName)
}
if _, ok := ret[testName]; ok {
return nil, sklog.FmtErrorf("Duplicate testname found: %s", testName)
}
ret[testName] = digests
}
return ret, nil
}
// readLines reads the content of r as lines.
// It filters out empty lines and lines that have "#" as a first character.
// It returns the lines without the trailing '\n' characters.
func readLines(r io.Reader) ([]string, error) {
result := []string{}
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" || line[0] == '#' {
continue
}
result = append(result, line)
}
if err := scanner.Err(); err != nil {
return nil, err
}
return result, nil
}
// containsEmpty returns true if any of the strings in the given slice contains an empty string
// or a string with only space-like characters.
func containsEmpty(parts []string) bool {
for _, p := range parts {
if strings.TrimSpace(p) == "" {
return true
}
}
return false
}
// parseLine parses a single entry in the file and returns the test name and the mapping
// from digests to labels, which can be used directly to add to a baseline.
func parseLine(line string) (string, map[string]types.Label, error) {
parts := strings.Split(line, " ")
if containsEmpty(parts) {
return "", nil, sklog.FmtErrorf("Tokens in line can only contain one separating space. Multiple found in %q", line)
}
// We need to have at least one digest
if len(parts) < 2 {
return "", nil, sklog.FmtErrorf("Expectations need to contain at least one image digest. Got: %q", line)
}
testName := parts[0]
digests := make(map[string]types.Label, len(parts)-1)
prev := ""
for _, digestLabel := range parts[1:] {
digest, label, err := splitDigestLabel(digestLabel)
if err != nil {
return "", nil, err
}
// Check if they are strictly monotonically increasing. This also covers the
// case of duplicate digests.
if digest <= prev {
return "", nil, sklog.FmtErrorf("Digests for test %q are not sorted or there are duplicates. Got sequence: %q %q", testName, prev, digest)
}
digests[digest] = label
prev = digest
}
return parts[0], digests, nil
}
// combineDigestLabel combines the digest and the label into a string and is the 'inverse' of
// splitDigestLabel.
func combineDigestLabel(digest string, label types.Label) string {
return fmt.Sprintf("%s:%s", digest, labelToCh[label])
}
// splitDigestLabel splits the digest and label encoded in a string by combineDigestLabel.
func splitDigestLabel(digestLabel string) (string, types.Label, error) {
matchedGroups := validDigestLabel.FindStringSubmatch(digestLabel)
if len(matchedGroups) != 3 {
return "", types.UNTRIAGED, sklog.FmtErrorf("Invalid digest/label entry: %q", digestLabel)
}
return matchedGroups[1], chToLabel[matchedGroups[2]], nil
}