golden/go/baseline/mergeable.go - buildbot - Git at Google

 package baseline

 import (
 	"bufio"
 	"fmt"
 	"io"
 	"regexp"
 	"sort"
 	"strings"

 	"go.skia.org/infra/go/sklog"
 	"go.skia.org/infra/golden/go/types"
 )

 // WIP - Very experimental and probably not working yet. Do not use in production !
 // This is a first draft and has not been tested in how it would do against actual merges.
 // This implements a serialization format for expectations/baselines that can be merged
 // automatically with Git with low probability of merge conflicts.
 //
 // Expectations are stored in a text file following this structure:
 // 	- Each line contains the expectations for one test.
 // 	- The tokens of each line are separated by exactly one white space.
 // 	- The first token is the test name.
 // 	- Each tokens following the test name are labeled digests.
 // 	- Each labeled digest follows this format: <hex_encoded_md5_hash>:<label>, where
 // 		hex_encoded_md5_hash is 32 characters long and label is one 'u', 'p', 'n'
 // 		(short for 'untriaged', 'positive', 'negative')
 // 	- All digests within a line are sorted in ascending order.
 // 	- All test names within the file are sorted in ascending order.
 //  - Empty lines and lines starting with '#' are ignored.
 //
 // Note: Labeling the digests might now be necessary, but we have it here so we don't lose any
 // information when serializing -> deserializing.

 var (
 	// isMD5 is used to verify that the given string is a hex-encoded MD5 hash.
 	isMD5 = regexp.MustCompile(`^[0-9a-f]{32}$`)

 	// validDigestLabel is used to verify that the digest pair follows the format
 	// described above.
 	validDigestLabel = regexp.MustCompile(`^([0-9a-f]{32}):(u|p|n)$`)

 	// labelToCh maps a label to a character.
 	labelToCh = map[types.Label]string{
 		types.UNTRIAGED: "u",
 		types.POSITIVE:  "p",
 		types.NEGATIVE:  "n",
 	}

 	// chToLabel maps a character to a label.
 	chToLabel = map[string]types.Label{
 		"u": types.UNTRIAGED,
 		"p": types.POSITIVE,
 		"n": types.NEGATIVE,
 	}
 )

 // WriteMergeableBaseline writes the given expectations to the provided Writer in a file
 // format that should be easy to merge for git.
 // The input is checked against these conditions:
 //    - No empty test names are allowed
 //    - All digests must be valid hex-encoded MD5 hashes (32 characters).
 func WriteMergeableBaseline(w io.Writer, baseLine types.TestExp) error {
 	allLines := make([]string, 0, len(baseLine))
 	for testName, digests := range baseLine {
 		if testName == "" {
 			return sklog.FmtErrorf("Received emtpy testname.")
 		}

 		digestList := make([]string, 0, len(digests))
 		for d, label := range digests {
 			if !isMD5.MatchString(d) {
 				return sklog.FmtErrorf("Expected hex-encoded MD5 hash. Got: %q", d)
 			}
 			digestList = append(digestList, combineDigestLabel(d, label))
 		}
 		sort.Strings(digestList)
 		line := fmt.Sprintf("%s %s", testName, strings.Join(digestList, " "))
 		allLines = append(allLines, line)
 	}
 	sort.Strings(allLines)
 	for _, line := range allLines {
 		if _, err := w.Write([]byte(line + "\n")); err != nil {
 			return sklog.FmtErrorf("Error writing line to writer: %s", err)
 		}
 	}
 	return nil
 }

 // ReadMergeableBaseline reads the expectations from the given reader, expecting the file format
 // described above.
 // It assumes that the given input file can be the result of Git merging two files that were
 // previously written via the WriteMergeableBaseline function.
 // It check that the input is consistent with the file format described above.
 func ReadMergeableBaseline(r io.Reader) (types.TestExp, error) {
 	lines, err := readLines(r)
 	if err != nil {
 		return nil, sklog.FmtErrorf("Error reading lines: %s", err)
 	}

 	if len(lines) == 0 {
 		return types.TestExp{}, nil
 	}

 	previousTest, digests, err := parseLine(lines[0])
 	if err != nil {
 		return nil, sklog.FmtErrorf("Error parsing the first line: %s", err)
 	}

 	ret := types.TestExp{previousTest: digests}

 	for _, line := range lines[1:] {
 		testName, digests, err := parseLine(line)
 		if err != nil {
 			return nil, sklog.FmtErrorf("Error parsing line: %s", err)
 		}

 		if testName < previousTest {
 			return nil, sklog.FmtErrorf("Testnames are not monotonically increasing: %s < %s is false", previousTest, testName)
 		}

 		if _, ok := ret[testName]; ok {
 			return nil, sklog.FmtErrorf("Duplicate testname found: %s", testName)
 		}

 		ret[testName] = digests
 	}

 	return ret, nil
 }

 // readLines reads the content of r as lines.
 // It filters out empty lines and lines that have "#" as a first character.
 // It returns the lines without the trailing '\n' characters.
 func readLines(r io.Reader) ([]string, error) {
 	result := []string{}
 	scanner := bufio.NewScanner(r)
 	for scanner.Scan() {
 		line := strings.TrimSpace(scanner.Text())
 		if line == "" || line[0] == '#' {
 			continue
 		}
 		result = append(result, line)
 	}

 	if err := scanner.Err(); err != nil {
 		return nil, err
 	}
 	return result, nil
 }

 // containsEmpty returns true if any of the strings in the given slice contains an empty string
 // or a string with only space-like characters.
 func containsEmpty(parts []string) bool {
 	for _, p := range parts {
 		if strings.TrimSpace(p) == "" {
 			return true
 		}
 	}
 	return false
 }

 // parseLine parses a single entry in the file and returns the test name and the mapping
 // from digests to labels, which can be used directly to add to a baseline.
 func parseLine(line string) (string, map[string]types.Label, error) {
 	parts := strings.Split(line, " ")
 	if containsEmpty(parts) {
 		return "", nil, sklog.FmtErrorf("Tokens in line can only contain one separating space. Multiple found in %q", line)
 	}

 	// We need to have at least one digest
 	if len(parts) < 2 {
 		return "", nil, sklog.FmtErrorf("Expectations need to contain at least one image digest. Got: %q", line)
 	}

 	testName := parts[0]
 	digests := make(map[string]types.Label, len(parts)-1)
 	prev := ""
 	for _, digestLabel := range parts[1:] {
 		digest, label, err := splitDigestLabel(digestLabel)
 		if err != nil {
 			return "", nil, err
 		}

 		// Check if they are strictly monotonically increasing. This also covers the
 		// case of duplicate digests.
 		if digest <= prev {
 			return "", nil, sklog.FmtErrorf("Digests for test %q are not sorted or there are duplicates. Got sequence: %q %q", testName, prev, digest)
 		}

 		digests[digest] = label
 		prev = digest
 	}
 	return parts[0], digests, nil
 }

 // combineDigestLabel combines the digest and the label into a string and is the 'inverse' of
 // splitDigestLabel.
 func combineDigestLabel(digest string, label types.Label) string {
 	return fmt.Sprintf("%s:%s", digest, labelToCh[label])
 }

 // splitDigestLabel splits the digest and label encoded in a string by combineDigestLabel.
 func splitDigestLabel(digestLabel string) (string, types.Label, error) {
 	matchedGroups := validDigestLabel.FindStringSubmatch(digestLabel)
 	if len(matchedGroups) != 3 {
 		return "", types.UNTRIAGED, sklog.FmtErrorf("Invalid digest/label entry: %q", digestLabel)
 	}

 	return matchedGroups[1], chToLabel[matchedGroups[2]], nil
 }
	package baseline

	import (
	"bufio"
	"fmt"
	"io"
	"regexp"
	"sort"
	"strings"

	"go.skia.org/infra/go/sklog"
	"go.skia.org/infra/golden/go/types"
	)

	// WIP - Very experimental and probably not working yet. Do not use in production !
	// This is a first draft and has not been tested in how it would do against actual merges.
	// This implements a serialization format for expectations/baselines that can be merged
	// automatically with Git with low probability of merge conflicts.
	//
	// Expectations are stored in a text file following this structure:
	// - Each line contains the expectations for one test.
	// - The tokens of each line are separated by exactly one white space.
	// - The first token is the test name.
	// - Each tokens following the test name are labeled digests.
	// - Each labeled digest follows this format: <hex_encoded_md5_hash>:<label>, where
	// hex_encoded_md5_hash is 32 characters long and label is one 'u', 'p', 'n'
	// (short for 'untriaged', 'positive', 'negative')
	// - All digests within a line are sorted in ascending order.
	// - All test names within the file are sorted in ascending order.
	// - Empty lines and lines starting with '#' are ignored.
	//
	// Note: Labeling the digests might now be necessary, but we have it here so we don't lose any
	// information when serializing -> deserializing.

	var (
	// isMD5 is used to verify that the given string is a hex-encoded MD5 hash.
	isMD5 = regexp.MustCompile(`^[0-9a-f]{32}$`)

	// validDigestLabel is used to verify that the digest pair follows the format
	// described above.
	validDigestLabel = regexp.MustCompile(`^([0-9a-f]{32}):(u\|p\|n)$`)

	// labelToCh maps a label to a character.
	labelToCh = map[types.Label]string{
	types.UNTRIAGED: "u",
	types.POSITIVE: "p",
	types.NEGATIVE: "n",
	}

	// chToLabel maps a character to a label.
	chToLabel = map[string]types.Label{
	"u": types.UNTRIAGED,
	"p": types.POSITIVE,
	"n": types.NEGATIVE,
	}
	)

	// WriteMergeableBaseline writes the given expectations to the provided Writer in a file
	// format that should be easy to merge for git.
	// The input is checked against these conditions:
	// - No empty test names are allowed
	// - All digests must be valid hex-encoded MD5 hashes (32 characters).
	func WriteMergeableBaseline(w io.Writer, baseLine types.TestExp) error {
	allLines := make([]string, 0, len(baseLine))
	for testName, digests := range baseLine {
	if testName == "" {
	return sklog.FmtErrorf("Received emtpy testname.")
	}

	digestList := make([]string, 0, len(digests))
	for d, label := range digests {
	if !isMD5.MatchString(d) {
	return sklog.FmtErrorf("Expected hex-encoded MD5 hash. Got: %q", d)
	}
	digestList = append(digestList, combineDigestLabel(d, label))
	}
	sort.Strings(digestList)
	line := fmt.Sprintf("%s %s", testName, strings.Join(digestList, " "))
	allLines = append(allLines, line)
	}
	sort.Strings(allLines)
	for _, line := range allLines {
	if _, err := w.Write([]byte(line + "\n")); err != nil {
	return sklog.FmtErrorf("Error writing line to writer: %s", err)
	}
	}
	return nil
	}

	// ReadMergeableBaseline reads the expectations from the given reader, expecting the file format
	// described above.
	// It assumes that the given input file can be the result of Git merging two files that were
	// previously written via the WriteMergeableBaseline function.
	// It check that the input is consistent with the file format described above.
	func ReadMergeableBaseline(r io.Reader) (types.TestExp, error) {
	lines, err := readLines(r)
	if err != nil {
	return nil, sklog.FmtErrorf("Error reading lines: %s", err)
	}

	if len(lines) == 0 {
	return types.TestExp{}, nil
	}

	previousTest, digests, err := parseLine(lines[0])
	if err != nil {
	return nil, sklog.FmtErrorf("Error parsing the first line: %s", err)
	}

	ret := types.TestExp{previousTest: digests}

	for _, line := range lines[1:] {
	testName, digests, err := parseLine(line)
	if err != nil {
	return nil, sklog.FmtErrorf("Error parsing line: %s", err)
	}

	if testName < previousTest {
	return nil, sklog.FmtErrorf("Testnames are not monotonically increasing: %s < %s is false", previousTest, testName)
	}

	if _, ok := ret[testName]; ok {
	return nil, sklog.FmtErrorf("Duplicate testname found: %s", testName)
	}

	ret[testName] = digests
	}

	return ret, nil
	}

	// readLines reads the content of r as lines.
	// It filters out empty lines and lines that have "#" as a first character.
	// It returns the lines without the trailing '\n' characters.
	func readLines(r io.Reader) ([]string, error) {
	result := []string{}
	scanner := bufio.NewScanner(r)
	for scanner.Scan() {
	line := strings.TrimSpace(scanner.Text())
	if line == "" \|\| line[0] == '#' {
	continue
	}
	result = append(result, line)
	}

	if err := scanner.Err(); err != nil {
	return nil, err
	}
	return result, nil
	}

	// containsEmpty returns true if any of the strings in the given slice contains an empty string
	// or a string with only space-like characters.
	func containsEmpty(parts []string) bool {
	for _, p := range parts {
	if strings.TrimSpace(p) == "" {
	return true
	}
	}
	return false
	}

	// parseLine parses a single entry in the file and returns the test name and the mapping
	// from digests to labels, which can be used directly to add to a baseline.
	func parseLine(line string) (string, map[string]types.Label, error) {
	parts := strings.Split(line, " ")
	if containsEmpty(parts) {
	return "", nil, sklog.FmtErrorf("Tokens in line can only contain one separating space. Multiple found in %q", line)
	}

	// We need to have at least one digest
	if len(parts) < 2 {
	return "", nil, sklog.FmtErrorf("Expectations need to contain at least one image digest. Got: %q", line)
	}

	testName := parts[0]
	digests := make(map[string]types.Label, len(parts)-1)
	prev := ""
	for _, digestLabel := range parts[1:] {
	digest, label, err := splitDigestLabel(digestLabel)
	if err != nil {
	return "", nil, err
	}

	// Check if they are strictly monotonically increasing. This also covers the
	// case of duplicate digests.
	if digest <= prev {
	return "", nil, sklog.FmtErrorf("Digests for test %q are not sorted or there are duplicates. Got sequence: %q %q", testName, prev, digest)
	}

	digests[digest] = label
	prev = digest
	}
	return parts[0], digests, nil
	}

	// combineDigestLabel combines the digest and the label into a string and is the 'inverse' of
	// splitDigestLabel.
	func combineDigestLabel(digest string, label types.Label) string {
	return fmt.Sprintf("%s:%s", digest, labelToCh[label])
	}

	// splitDigestLabel splits the digest and label encoded in a string by combineDigestLabel.
	func splitDigestLabel(digestLabel string) (string, types.Label, error) {
	matchedGroups := validDigestLabel.FindStringSubmatch(digestLabel)
	if len(matchedGroups) != 3 {
	return "", types.UNTRIAGED, sklog.FmtErrorf("Invalid digest/label entry: %q", digestLabel)
	}

	return matchedGroups[1], chToLabel[matchedGroups[2]], nil
	}