blob: 3ec0a27003a5c01fd00a6c84321e04d3875cbf57 [file] [log] [blame]
package sql_test
import (
"context"
"strings"
"testing"
"github.com/stretchr/testify/require"
"go.skia.org/infra/go/paramtools"
"go.skia.org/infra/golden/go/sql"
"go.skia.org/infra/golden/go/sql/schema"
"go.skia.org/infra/golden/go/sql/sqltest"
"go.skia.org/infra/golden/go/types"
)
// The following benchmarks measure two different techniques for searching Traces by the keys.
// For each test, the "faster" technique (at least when running locally on my dev machine)
// is listed first.
// Locally, this ran at about 30ms per operation for 36400 Traces. It can use the indexes and
// appears to do so in parallel.
func BenchmarkUnionIntersect_AllKeysHaveMultipleValues(b *testing.B) {
ctx := context.Background()
db := sqltest.NewCockroachDBForTestsWithProductionSchema(ctx, b)
traces := makeTestTraceRows()
require.NoError(b, sqltest.BulkInsertDataTables(ctx, db, schema.Tables{
Traces: traces,
}))
b.ResetTimer()
count := 0
for i := 0; i < b.N; i++ {
row := db.QueryRow(ctx, `
WITH
Fruit AS (
SELECT trace_id FROM Traces WHERE keys -> 'fruit' = '"apple"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'fruit' = '"fig"'
),
Callsign AS (
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"foxtrot"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"tango"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"alfa"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"does not exist"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"charlie"'
),
Province AS (
SELECT trace_id FROM Traces WHERE keys -> 'province' = '"Ontario"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'province' = '"Something"'
)
SELECT COUNT(*) FROM (
SELECT * FROM Fruit
INTERSECT
SELECT * FROM Callsign
INTERSECT
SELECT * FROM Province) x;`)
_ = row.Scan(&count)
if count != 2*4*2*1 {
panic("wrong")
}
}
}
// Locally, this ran at about 100ms per operation for 36400 Traces. It has to do a full table scan.
func BenchmarkAndIN_AllKeysHaveMultipleValues(b *testing.B) {
ctx := context.Background()
db := sqltest.NewCockroachDBForTestsWithProductionSchema(ctx, b)
traces := makeTestTraceRows()
require.NoError(b, sqltest.BulkInsertDataTables(ctx, db, schema.Tables{
Traces: traces,
}))
b.ResetTimer()
count := 0
for i := 0; i < b.N; i++ {
row := db.QueryRow(ctx, `
SELECT count(trace_id) FROM Traces WHERE keys -> 'fruit' IN('"apple"','"fig"')
AND keys -> 'callsign' IN('"foxtrot"','"tango"', '"alfa"', '"does not exist"', '"charlie"')
AND keys -> 'province' IN('"Ontario"','"Something"');`)
_ = row.Scan(&count)
if count != 2*4*1*2 {
panic("wrong")
}
}
}
// Locally, this ran at about 30ms per operation for 36400 Traces. It can use the indexes and
// appears to do so in parallel.
func BenchmarkUnionIntersect_SomeKeysHaveASingleValue(b *testing.B) {
ctx := context.Background()
db := sqltest.NewCockroachDBForTestsWithProductionSchema(ctx, b)
traces := makeTestTraceRows()
require.NoError(b, sqltest.BulkInsertDataTables(ctx, db, schema.Tables{
Traces: traces,
}))
b.ResetTimer()
count := 0
for i := 0; i < b.N; i++ {
row := db.QueryRow(ctx, `
WITH
Callsign AS (
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"foxtrot"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"tango"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"alfa"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"does not exist"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"charlie"'
)
SELECT COUNT(*) FROM (
SELECT trace_id FROM Traces WHERE keys -> 'fruit' = '"apple"'
INTERSECT
SELECT * FROM Callsign
INTERSECT
SELECT trace_id FROM Traces WHERE keys -> 'province' = '"Ontario"') x;`)
_ = row.Scan(&count)
if count != 1*4*1*2 {
panic("wrong")
}
}
}
// Locally, this ran at about 80ms per operation for 36400 Traces. It did a zig-zag join on the
// two single item keys and then a full search over that.
func BenchmarkAndIN_SomeKeysHaveASingleValue(b *testing.B) {
ctx := context.Background()
db := sqltest.NewCockroachDBForTestsWithProductionSchema(ctx, b)
traces := makeTestTraceRows()
require.NoError(b, sqltest.BulkInsertDataTables(ctx, db, schema.Tables{
Traces: traces,
}))
b.ResetTimer()
count := 0
for i := 0; i < b.N; i++ {
row := db.QueryRow(ctx, `
SELECT count(trace_id) FROM Traces WHERE keys -> 'fruit' = '"apple"'
AND keys -> 'callsign' IN('"foxtrot"','"tango"', '"alfa"', '"does not exist"', '"charlie"')
AND keys -> 'province' = '"Ontario"';`)
_ = row.Scan(&count)
if count != 1*4*1*2 {
panic("wrong")
}
}
}
// Locally, this ran at about 10ms per operation for 36400 Traces. It can use the indexes and
// appears to do so in parallel.
func BenchmarkUnionIntersect_AllKeysHaveASingleValue(b *testing.B) {
ctx := context.Background()
db := sqltest.NewCockroachDBForTestsWithProductionSchema(ctx, b)
traces := makeTestTraceRows()
require.NoError(b, sqltest.BulkInsertDataTables(ctx, db, schema.Tables{
Traces: traces,
}))
b.ResetTimer()
count := 0
for i := 0; i < b.N; i++ {
row := db.QueryRow(ctx, `
SELECT COUNT(*) FROM (
SELECT trace_id FROM Traces WHERE keys -> 'fruit' = '"apple"'
INTERSECT
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"foxtrot"'
INTERSECT
SELECT trace_id FROM Traces WHERE keys -> 'province' = '"Ontario"') x;`)
_ = row.Scan(&count)
if count != 1*1*1*2 {
panic("wrong")
}
}
}
// Locally, this ran at about 70ms per operation for 36400 Traces. It did a zig-zag join on the
// keys and then a lookup join on that.
func BenchmarkAndIN_AllKeysHaveASingleValue(b *testing.B) {
ctx := context.Background()
db := sqltest.NewCockroachDBForTestsWithProductionSchema(ctx, b)
traces := makeTestTraceRows()
require.NoError(b, sqltest.BulkInsertDataTables(ctx, db, schema.Tables{
Traces: traces,
}))
b.ResetTimer()
count := 0
for i := 0; i < b.N; i++ {
row := db.QueryRow(ctx, `
SELECT count(trace_id) FROM Traces WHERE keys -> 'fruit' = '"apple"'
AND keys -> 'callsign' = '"foxtrot"'
AND keys -> 'province' = '"Ontario"';`)
_ = row.Scan(&count)
if count != 1*1*1*2 {
panic("wrong")
}
}
}
// Locally, this ran at about 5ms per operation for 36400 Traces. There were only 260 traces
// with number = "three" (this value was hand-selected for the reason of having the fewest number
// of traces in an attempt to find a case where AndIN was faster). The optimizer looked up that
// index and then joined it with the index of callsign.
func BenchmarkAndIN_IndexJoinPossible(b *testing.B) {
ctx := context.Background()
db := sqltest.NewCockroachDBForTestsWithProductionSchema(ctx, b)
traces := makeTestTraceRows()
require.NoError(b, sqltest.BulkInsertDataTables(ctx, db, schema.Tables{
Traces: traces,
}))
b.ResetTimer()
count := 0
for i := 0; i < b.N; i++ {
row := db.QueryRow(ctx, `
SELECT count(trace_id) FROM Traces WHERE keys -> 'number' = '"three"'
AND keys -> 'callsign' IN('"foxtrot"','"tango"', '"alfa"', '"does not exist"', '"charlie"');`)
_ = row.Scan(&count)
if count != 40 {
panic("wrong")
}
}
}
// Locally, this ran at about 20ms per operation for 36400 Traces. It is the same query as
// BenchmarkAndIN_IndexJoinPossible and shows how sometimes, UnionIntersect can be slower because
// it can sometimes have to fetch more data (e.g. a lot of rows based on callsign) just to throw
// nearly all of them away (because only a few of them have number = "three").
func BenchmarkUnionIntersect_IndexJoinComparison(b *testing.B) {
ctx := context.Background()
db := sqltest.NewCockroachDBForTestsWithProductionSchema(ctx, b)
traces := makeTestTraceRows()
require.NoError(b, sqltest.BulkInsertDataTables(ctx, db, schema.Tables{
Traces: traces,
}))
b.ResetTimer()
count := 0
for i := 0; i < b.N; i++ {
row := db.QueryRow(ctx, `
WITH
Callsign AS (
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"foxtrot"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"tango"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"alfa"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"does not exist"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"charlie"'
)
SELECT COUNT(*) FROM (
SELECT trace_id FROM Traces WHERE keys -> 'number' = '"three"'
INTERSECT
SELECT * FROM Callsign) x;`)
_ = row.Scan(&count)
if count != 40 {
panic("wrong")
}
}
}
// Locally, this ran at about 30ms per operation for 36400 Traces. It is the same query as
// BenchmarkAndIN_IndexJoinNotAlwaysFast and shows that even when using an index join, the AndIN
// case is not consistently faster.
func BenchmarkUnionIntersect_IndexJoinNotAlwaysFastComparison(b *testing.B) {
ctx := context.Background()
db := sqltest.NewCockroachDBForTestsWithProductionSchema(ctx, b)
traces := makeTestTraceRows()
require.NoError(b, sqltest.BulkInsertDataTables(ctx, db, schema.Tables{
Traces: traces,
}))
b.ResetTimer()
count := 0
for i := 0; i < b.N; i++ {
row := db.QueryRow(ctx, `
WITH
Callsign AS (
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"foxtrot"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"tango"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"alfa"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"does not exist"'
UNION
SELECT trace_id FROM Traces WHERE keys -> 'callsign' = '"charlie"'
)
SELECT COUNT(*) FROM (
SELECT trace_id FROM Traces WHERE keys -> 'number' = '"eight"'
INTERSECT
SELECT * FROM Callsign) x;`)
_ = row.Scan(&count)
if count != 440 {
panic("wrong")
}
}
}
// Locally, this ran at about 30ms per operation for 36400 Traces. There were 2860 traces
// with number = "eight", a contrasting example to the other IndexJoin benchmark. It was carried
// out the same way, but was much slower.
func BenchmarkAndIN_IndexJoinNotAlwaysFast(b *testing.B) {
ctx := context.Background()
db := sqltest.NewCockroachDBForTestsWithProductionSchema(ctx, b)
traces := makeTestTraceRows()
require.NoError(b, sqltest.BulkInsertDataTables(ctx, db, schema.Tables{
Traces: traces,
}))
b.ResetTimer()
count := 0
for i := 0; i < b.N; i++ {
row := db.QueryRow(ctx, `
SELECT count(trace_id) FROM Traces WHERE keys -> 'number' = '"eight"'
AND keys -> 'callsign' IN('"foxtrot"','"tango"', '"alfa"', '"does not exist"', '"charlie"');`)
_ = row.Scan(&count)
if count != 440 {
panic("wrong")
}
}
}
const (
fruitKey = "fruit"
callsignKey = "callsign"
provinceKey = "province"
numberKey = "number"
letterKey = "letter"
)
// makeTestTraceRows makes many traces filled with keys of arbitrary data that is meant to
// be somewhat representative of the keys in Gold. All of the traces have certain keys
// (e.g. fruit, callsign, province), but only some of others (e.g. number, letter). Just like
// Gold, some keys have more distinct values than others.
func makeTestTraceRows() []schema.TraceRow {
// https://en.wikibooks.org/wiki/Wikijunior:Fruit_Alphabet
fruits := []string{"apple", "apricots", "avocados", "banana", "boysenberry", "blueberry",
"cherry", "cantaloupe", "clementine", "date", "dewberry", "dragon fruit", "elderberry",
"eggfruit", "evergreen huckleberry", "entawak", "fig", "farkleberry, finger lime",
"grapefruit", "grapes", "gooseberries", "guava", "honeydew", "hackberry", "imbe",
"jackfruit", "jambolan", "kiwi", "kumquat", "lime", "lemon", "longan", "lychee", "loquat",
"mango", "mulberry", "melon", "nectarine", "olive", "orange", "papaya", "persimmon",
"paw paw", "prickly pear", "peach", "pomegranate", "pineapple", "passion fruit",
"quince", "quararibea cordata", "rambutan", "raspberry", "rose hip", "star fruit",
"strawberry", "tomato", "tangerine", "tamarind", "ugli fruit", "uniq fruit", "ugni",
"vanilla bean", "voavanga", "watermelon", "wolfberry", "xigua", "ximenia", "xango",
"yangmei", "zig zag vine fruit",
}
// https://en.wikipedia.org/wiki/NATO_phonetic_alphabet
callsign := []string{"alfa", "bravo", "charlie", "delta", "echo", "foxtrot", "golf", "hotel",
"india", "juliett", "kilo", "lima", "mike", "november", "oscar", "papa", "quebec", "romeo",
"sierra", "tango", "uniform", "victor", "whiskey", "x-ray", "yankee", "zulu"}
// https://en.wikipedia.org/wiki/Provinces_and_territories_of_Canada
province := []string{"Alberta", "British Columbia", "Manitoba", "New Brunswick",
"Newfoundland and Labrador", "Nova Scotia", "Ontario", "Prince Edward Island", "Quebec",
"Saskatchewan"}
numbers := []string{"zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
"nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen",
"seventeen", "eighteen", "nineteen", "many"}
var rv []schema.TraceRow
addTraceRow := func(p paramtools.Params) {
_, traceID := sql.SerializeMap(p)
_, groupingID := sql.SerializeMap(paramtools.Params{
types.CorpusField: "a_corpus",
fruitKey: p[fruitKey],
})
rv = append(rv, schema.TraceRow{
TraceID: traceID,
Corpus: "a_corpus",
GroupingID: groupingID,
Keys: p,
MatchesAnyIgnoreRule: schema.NBFalse,
})
}
for _, f := range fruits {
for _, c := range callsign {
for _, p := range province {
idx := len(f)
if idx >= len(numbers) {
idx = len(numbers) - 1
}
addTraceRow(paramtools.Params{
types.CorpusField: "a_corpus",
fruitKey: f,
callsignKey: c,
provinceKey: p,
numberKey: numbers[idx],
})
addTraceRow(paramtools.Params{
types.CorpusField: "a_corpus",
fruitKey: f,
callsignKey: c,
provinceKey: p,
letterKey: strings.Repeat(string(f[0]), 6),
})
}
}
}
return rv
}