blob: 22944b56e47fdc854656257d93eff6bc3863ddc7 [file] [log] [blame]
package regression
import (
"math"
"sort"
"go.skia.org/infra/go/sklog"
"go.skia.org/infra/go/vec32"
"go.skia.org/infra/perf/go/clustering2"
"go.skia.org/infra/perf/go/config"
"go.skia.org/infra/perf/go/dataframe"
"go.skia.org/infra/perf/go/stepfit"
)
// FilterTail - Please see go/calmbench-trace-tail for how we filter the tail.
func FilterTail(trace []float32, quantile float64, multiplier float64, slack float64) float32 {
if len(trace) == 0 {
return 0
}
tail := trace[len(trace)-1]
if tail == vec32.MISSING_DATA_SENTINEL {
return 0
}
sortedTrace := make([]float64, 0, len(trace))
for i := 0; i < len(trace)-1; i++ {
if trace[i] != vec32.MISSING_DATA_SENTINEL {
sortedTrace = append(sortedTrace, float64(trace[i]))
}
}
sort.Float64s(sortedTrace)
n := len(sortedTrace)
p := int(math.Floor(float64(n-1) * quantile))
lowerBound := math.Min(0, sortedTrace[p])
upperBound := math.Max(0, sortedTrace[n-1-p])
tail64 := float64(tail)
if tail64 > upperBound*multiplier+slack || tail64 < lowerBound*multiplier-slack {
return tail
} else {
return 0
}
}
// Tail finds regressions in calmbench data.
func Tail(df *dataframe.DataFrame, k int, stddevThreshold float32, progress clustering2.Progress, interesting float32) (*clustering2.ClusterSummaries, error) {
low := clustering2.NewClusterSummary()
high := clustering2.NewClusterSummary()
// Normalize each trace and then run through stepfit. If interesting then
// add to appropriate cluster.
count := 0
for key, trace := range df.TraceSet {
count++
if count%10000 == 0 {
sklog.Infof("stepfit count: %d", count)
}
t := vec32.Dup(trace)
vec32.Norm(t, stddevThreshold)
sf := stepfit.GetStepFitAtMid(t, interesting)
isLow := sf.Status == stepfit.LOW
isHigh := sf.Status == stepfit.HIGH
quantile := 1 / float64(interesting)
slack := float64(k) * 0.01
const MULTIPLIER = 2 // TODO(liyuqian): Make this configurable
tail := FilterTail(trace, quantile, MULTIPLIER, slack)
isLow = tail < 0
isHigh = tail > 0
sf.TurningPoint = len(trace) - 1
// If stepfit is at the middle and if it is a step up or down.
if isLow {
if low.StepFit.Status == "" {
low.StepFit = sf
low.StepFit.Status = stepfit.LOW // for TAIL_ALGO
low.StepPoint = df.Header[sf.TurningPoint]
low.Centroid = vec32.Dup(trace)
}
low.Num++
if low.Num < config.MAX_SAMPLE_TRACES_PER_CLUSTER {
low.Keys = append(low.Keys, key)
}
} else if isHigh {
if high.StepFit.Status == "" {
high.StepFit = sf
high.StepFit.Status = stepfit.HIGH // for TAIL_ALGO
high.StepPoint = df.Header[sf.TurningPoint]
high.Centroid = vec32.Dup(trace)
}
high.Num++
if high.Num < config.MAX_SAMPLE_TRACES_PER_CLUSTER {
high.Keys = append(high.Keys, key)
}
}
}
sklog.Infof("Found LOW: %d HIGH: %d", low.Num, high.Num)
ret := &clustering2.ClusterSummaries{
Clusters: []*clustering2.ClusterSummary{},
K: k,
StdDevThreshold: stddevThreshold,
}
if low.Num > 0 {
low.ParamSummaries = clustering2.GetParamSummariesForKeys(low.Keys)
ret.Clusters = append(ret.Clusters, low)
}
if high.Num > 0 {
high.ParamSummaries = clustering2.GetParamSummariesForKeys(high.Keys)
ret.Clusters = append(ret.Clusters, high)
}
return ret, nil
}