Repeat Chromium Tryserver page_set runs to reduce variance.
* Find the median of runs with the same page_name in csv_merger.py
* vm_run_telemery.sh will now read an env variable to determine how many times each page_set run should be repeated.
BUG=358628, skia:2396
Review URL: https://codereview.chromium.org/231433003
diff --git a/cluster_telemetry/csv_merger.py b/cluster_telemetry/csv_merger.py
index ee84bcf..8afe293 100644
--- a/cluster_telemetry/csv_merger.py
+++ b/cluster_telemetry/csv_merger.py
@@ -3,7 +3,12 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
-"""Python utility to merge many CSV files into a single file."""
+"""Python utility to merge many CSV files into a single file.
+
+If there are multiple CSV files with the same TELEMETRY_PAGE_NAME_KEY then the
+median of all values is stored in the resultant CSV file.
+"""
+
import csv
import glob
@@ -12,6 +17,9 @@
import sys
+TELEMETRY_PAGE_NAME_KEY = 'page_name'
+
+
class CsvMerger(object):
"""Class that merges many CSV files into a single file."""
@@ -29,24 +37,92 @@
field_names.update(csv.DictReader(open(csv_file, 'r')).fieldnames)
return field_names
+ def _GetMedian(self, l):
+ """Returns the median value from the specified list."""
+ l.sort()
+ length = len(l)
+ if not length % 2:
+ return (l[(length/2) - 1] + l[length/2]) / 2
+ else:
+ return l[length/2]
+
+ def _GetRowWithMedianValues(self, rows):
+ """Parses the specified rows and returns a single row with median values."""
+ fieldname_to_values = {}
+ for row in rows:
+ for fieldname in row:
+ if fieldname == TELEMETRY_PAGE_NAME_KEY:
+ fieldname_to_values[fieldname] = row[fieldname]
+ continue
+ try:
+ value = float(row[fieldname])
+ except ValueError:
+ # We expected only floats, cannot compare strings. Skip this field.
+ continue
+ if fieldname in fieldname_to_values:
+ fieldname_to_values[fieldname].append(value)
+ else:
+ fieldname_to_values[fieldname] = [value]
+
+ median_row = {}
+ for fieldname, values in fieldname_to_values.items():
+ if fieldname == TELEMETRY_PAGE_NAME_KEY:
+ median_row[fieldname] = values
+ continue
+ median_row[fieldname] = self._GetMedian(values)
+
+ print
+ print 'For rows: %s' % rows
+ print 'Median row is %s' % median_row
+ print
+ return median_row
+
def Merge(self):
"""Method that does the CSV merging."""
field_names = self._GetFieldNames()
print 'Merging %d csv files into %d columns' % (len(self._input_csv_files),
len(field_names))
- dict_writer = csv.DictWriter(open(self._output_csv_name, 'w'), field_names)
- dict_writer.writeheader()
+ # List that will contain all rows read from the CSV files. It will also
+ # combine all rows found with the same TELEMETRY_PAGE_NAME_KEY into one
+ # with median values.
+ csv_rows = []
- total_rows = 0
+ # Dictionary containing all the encountered page names. If a page name that
+ # is already in the dictionary is encountered then the median of its
+ # values is used.
+ page_names_to_rows = {}
for csv_file in self._input_csv_files:
- print 'Merging %s' % csv_file
-
dict_reader = csv.DictReader(open(csv_file, 'r'))
for row in dict_reader:
- dict_writer.writerow(row)
- total_rows += 1
+ if TELEMETRY_PAGE_NAME_KEY in row:
+ # Add rows found with 'page_name' to a different dictionary for
+ # processing.
+ if row[TELEMETRY_PAGE_NAME_KEY] in page_names_to_rows:
+ page_names_to_rows[row[TELEMETRY_PAGE_NAME_KEY]].append(row)
+ else:
+ page_names_to_rows[row[TELEMETRY_PAGE_NAME_KEY]] = [row]
+ else:
+ # Add rows found without TELEMETRY_PAGE_NAME_KEY to the final list of
+ # rows, they require no further processing.
+ csv_rows.append(row)
+
+ if page_names_to_rows:
+ for page_name in page_names_to_rows:
+ rows = page_names_to_rows[page_name]
+ median_row = self._GetRowWithMedianValues(rows)
+ # Add a single row that contains median values from all rows with the
+ # same TELEMETRY_PAGE_NAME_KEY.
+ csv_rows.append(median_row)
+
+ # Write all rows in csv_rows to the specified output CSV.
+ dict_writer = csv.DictWriter(open(self._output_csv_name, 'w'), field_names)
+ dict_writer.writeheader()
+ total_rows = 0
+ for row in csv_rows:
+ dict_writer.writerow(row)
+ total_rows += 1
print 'Successfully merged %d rows' % total_rows
diff --git a/cluster_telemetry/telemetry_slave_scripts/vm_run_telemetry.sh b/cluster_telemetry/telemetry_slave_scripts/vm_run_telemetry.sh
index 6753315..5419fb1 100644
--- a/cluster_telemetry/telemetry_slave_scripts/vm_run_telemetry.sh
+++ b/cluster_telemetry/telemetry_slave_scripts/vm_run_telemetry.sh
@@ -61,12 +61,17 @@
gsutil cp $WHITELIST_GS_LOCATION /tmp/$WHITELIST_FILE
fi
+# The number of times to repeate telemetry page_set runs.
+REPEAT_TELEMETRY_RUNS=${REPEAT_TELEMETRY_RUNS:=3}
+
if [ "$TELEMETRY_BENCHMARK" == "skpicture_printer" ]; then
# Clean and create the skp output directory.
sudo chown -R chrome-bot:chrome-bot /b/storage/skps/$PAGESETS_TYPE/$CHROMIUM_BUILD_DIR
rm -rf /b/storage/skps/$PAGESETS_TYPE/$CHROMIUM_BUILD_DIR
mkdir -p /b/storage/skps/$PAGESETS_TYPE/$CHROMIUM_BUILD_DIR/
EXTRA_ARGS="--skp-outdir=/b/storage/skps/$PAGESETS_TYPE/$CHROMIUM_BUILD_DIR/ $EXTRA_ARGS"
+ # Only do one run for SKPs.
+ REPEAT_TELEMETRY_RUNS=1
fi
if [ "$TELEMETRY_BENCHMARK" == "smoothness" ]; then
@@ -78,6 +83,12 @@
OUTPUT_DIR=/b/storage/telemetry_outputs/$RUN_ID
mkdir -p $OUTPUT_DIR
+# Change all local page_sets to use 0 wait seconds.
+find /home/default/storage/page_sets/$PAGESETS_TYPE/ -type f -exec sed -i "s/\"seconds\": 5/\"seconds\": 0/g" {} \;
+
+# Start the timer.
+TIMER="$(date +%s)"
+
for page_set in /b/storage/page_sets/$PAGESETS_TYPE/*.json; do
if [[ -f $page_set ]]; then
if [[ ! -z "$WHITELIST_GS_LOCATION" ]]; then
@@ -97,8 +108,14 @@
OUTPUT_DIR_ARG="-o $OUTPUT_DIR/${RUN_ID}.${page_set_basename}"
fi
echo "=== Running: eval sudo DISPLAY=:0 timeout 300 src/tools/perf/run_measurement --extra-browser-args=\"--disable-setuid-sandbox --enable-software-compositing $EXTRA_BROWSER_ARGS\" --browser-executable=/b/storage/chromium-builds/${CHROMIUM_BUILD_DIR}/chrome --browser=exact $TELEMETRY_BENCHMARK $page_set $EXTRA_ARGS $OUTPUT_DIR_ARG ==="
- eval sudo DISPLAY=:0 timeout 300 src/tools/perf/run_measurement --extra-browser-args=\"--disable-setuid-sandbox --enable-software-compositing\" --browser-executable=/b/storage/chromium-builds/${CHROMIUM_BUILD_DIR}/chrome --browser=exact $TELEMETRY_BENCHMARK $page_set $EXTRA_ARGS $OUTPUT_DIR_ARG
- sudo chown chrome-bot:chrome-bot $OUTPUT_DIR/${RUN_ID}.${page_set_basename}
+
+ for current_run in `seq 1 $REPEAT_TELEMETRY_RUNS`;
+ do
+ echo "This is run number $current_run"
+ eval sudo DISPLAY=:0 timeout 300 src/tools/perf/run_measurement --extra-browser-args=\"--disable-setuid-sandbox --enable-software-compositing\" --browser-executable=/b/storage/chromium-builds/${CHROMIUM_BUILD_DIR}/chrome --browser=exact $TELEMETRY_BENCHMARK $page_set $EXTRA_ARGS ${OUTPUT_DIR_ARG}.${current_run}
+ sudo chown chrome-bot:chrome-bot $OUTPUT_DIR/${RUN_ID}.${page_set_basename}.${current_run}
+ done
+
if [ $? -eq 124 ]; then
echo "========== $page_set timed out! =========="
else
@@ -107,6 +124,9 @@
fi
done
+TELEMETRY_TIME="$(($(date +%s)-TIMER))"
+echo "Going through all page_sets took $TELEMETRY_TIME seconds"
+
# Consolidate outputs from all page sets into a single file with special
# handling for CSV files.
mkdir $OUTPUT_DIR/${RUN_ID}
diff --git a/cluster_telemetry/test_data/csv_merger/expected_output b/cluster_telemetry/test_data/csv_merger/expected_output
index 0552e0d..1d74af0 100644
--- a/cluster_telemetry/test_data/csv_merger/expected_output
+++ b/cluster_telemetry/test_data/csv_merger/expected_output
@@ -1,16 +1,19 @@
-a,c,b,e,d,"e,heading","a,heading",y,x,z
-,,,,,,,y4.1,x4.1,z4.1
-,,,,,,,y4.2,x4.2,z4.2
-,,,,,,,y4.3,x4.3,z4.3
-a5.1,,b5.1,e5.1,,,,,,z5.1
-a5.2,,b5.2,e5.2,,,,,,z5.2
-a5.3,,b5.3,e5.3,,,,,,z5.3
-a5.4,,b5.4,e5.4,,,,,,z5.4
-a1.1,c1.1,b1.1,,,,,,,
-a1.2,c1.2,b1.2,,,,,,,
-a2,c2,,e2,d2,,,,,
-,,b5.1,,,a5.1,e5.1,,,z5.1
-,,b5.2,,,a5.2,e5.2,,,z5.2
-,,b5.3,,,a5.3,e5.3,,,z5.3
-,,b5.4,,,a5.4,e5.4,,,z5.4
-,,,,d5,,,y5,x5,
+a,c,b,e,d,pixels_rasterized (pixels),pixels_recorded (pixels),record_time (ms),"e,heading",page_name,"a,heading",y,x,z,rasterize_time (ms)
+a5.1,,b5.1,e5.1,,,,,,,,,,z5.1,
+a5.2,,b5.2,e5.2,,,,,,,,,,z5.2,
+a5.3,,b5.3,e5.3,,,,,,,,,,z5.3,
+a5.4,,b5.4,e5.4,,,,,,,,,,z5.4,
+,,b5.1,,,,,,a5.1,,e5.1,,,z5.1,
+,,b5.2,,,,,,a5.2,,e5.2,,,z5.2,
+,,b5.3,,,,,,a5.3,,e5.3,,,z5.3,
+,,b5.4,,,,,,a5.4,,e5.4,,,z5.4,
+a2,c2,,e2,d2,,,,,,,,,,
+,,,,d5,,,,,,,y5,x5,,
+,,,,,,,,,,,y4.1,x4.1,z4.1,
+,,,,,,,,,,,y4.2,x4.2,z4.2,
+,,,,,,,,,,,y4.3,x4.3,z4.3,
+a1.1,c1.1,b1.1,,,,,,,,,,,,
+a1.2,c1.2,b1.2,,,,,,,,,,,,
+,,,,,2.5,1.5,,,http://www.google.com,,,,,1.0
+,,,,,1.0,,,,http://www.gmail.com,,,,,1.0
+,,,,,1310720.0,1172655.0,0.741,,http://www.facebook.com/,,,,,2.372
diff --git a/cluster_telemetry/test_data/csv_merger/page1-1.csv b/cluster_telemetry/test_data/csv_merger/page1-1.csv
new file mode 100644
index 0000000..9221f31
--- /dev/null
+++ b/cluster_telemetry/test_data/csv_merger/page1-1.csv
@@ -0,0 +1,2 @@
+pixels_recorded (pixels),page_name,rasterize_time (ms),record_time (ms),pixels_rasterized (pixels)
+1172655,http://www.facebook.com/,2.359,0.743,1310720
diff --git a/cluster_telemetry/test_data/csv_merger/page1-2.csv b/cluster_telemetry/test_data/csv_merger/page1-2.csv
new file mode 100644
index 0000000..6057f64
--- /dev/null
+++ b/cluster_telemetry/test_data/csv_merger/page1-2.csv
@@ -0,0 +1,2 @@
+pixels_recorded (pixels),page_name,rasterize_time (ms),record_time (ms),pixels_rasterized (pixels)
+1172655,http://www.facebook.com/,2.385,0.738,1310720
diff --git a/cluster_telemetry/test_data/csv_merger/page1-3.csv b/cluster_telemetry/test_data/csv_merger/page1-3.csv
new file mode 100644
index 0000000..6df1ea6
--- /dev/null
+++ b/cluster_telemetry/test_data/csv_merger/page1-3.csv
@@ -0,0 +1,2 @@
+pixels_recorded (pixels),page_name,rasterize_time (ms),record_time (ms),pixels_rasterized (pixels)
+1172655,http://www.facebook.com/,2.372,0.741,1310720
diff --git a/cluster_telemetry/test_data/csv_merger/page1-4.csv b/cluster_telemetry/test_data/csv_merger/page1-4.csv
new file mode 100644
index 0000000..49993b2
--- /dev/null
+++ b/cluster_telemetry/test_data/csv_merger/page1-4.csv
@@ -0,0 +1,2 @@
+pixels_recorded (pixels),page_name,rasterize_time (ms),record_time (ms),pixels_rasterized (pixels)
+,http://www.facebook.com/,,,1310720
diff --git a/cluster_telemetry/test_data/csv_merger/page2-1.csv b/cluster_telemetry/test_data/csv_merger/page2-1.csv
new file mode 100644
index 0000000..192401f
--- /dev/null
+++ b/cluster_telemetry/test_data/csv_merger/page2-1.csv
@@ -0,0 +1,2 @@
+pixels_rasterized (pixels),pixels_recorded (pixels),page_name,rasterize_time (ms)
+1,,http://www.google.com,1
diff --git a/cluster_telemetry/test_data/csv_merger/page2-2.csv b/cluster_telemetry/test_data/csv_merger/page2-2.csv
new file mode 100644
index 0000000..68f2815
--- /dev/null
+++ b/cluster_telemetry/test_data/csv_merger/page2-2.csv
@@ -0,0 +1,2 @@
+pixels_rasterized (pixels),pixels_recorded (pixels),page_name
+2,,http://www.google.com
diff --git a/cluster_telemetry/test_data/csv_merger/page2-3.csv b/cluster_telemetry/test_data/csv_merger/page2-3.csv
new file mode 100644
index 0000000..2a87573
--- /dev/null
+++ b/cluster_telemetry/test_data/csv_merger/page2-3.csv
@@ -0,0 +1,2 @@
+pixels_rasterized (pixels),pixels_recorded (pixels),page_name
+3,1,http://www.google.com
diff --git a/cluster_telemetry/test_data/csv_merger/page2-4.csv b/cluster_telemetry/test_data/csv_merger/page2-4.csv
new file mode 100644
index 0000000..a71ad75
--- /dev/null
+++ b/cluster_telemetry/test_data/csv_merger/page2-4.csv
@@ -0,0 +1,2 @@
+pixels_rasterized (pixels),pixels_recorded (pixels),page_name,y
+4,2,http://www.google.com,
diff --git a/cluster_telemetry/test_data/csv_merger/page3-1.csv b/cluster_telemetry/test_data/csv_merger/page3-1.csv
new file mode 100644
index 0000000..c5210da
--- /dev/null
+++ b/cluster_telemetry/test_data/csv_merger/page3-1.csv
@@ -0,0 +1,2 @@
+pixels_rasterized (pixels),pixels_recorded (pixels),page_name,rasterize_time (ms)
+1,,http://www.gmail.com,1