Repeat Chromium Tryserver page_set runs to reduce variance. * Find the median of runs with the same page_name in csv_merger.py * vm_run_telemery.sh will now read an env variable to determine how many times each page_set run should be repeated. BUG=358628, skia:2396 Review URL: https://codereview.chromium.org/231433003
diff --git a/cluster_telemetry/csv_merger.py b/cluster_telemetry/csv_merger.py index ee84bcf..8afe293 100644 --- a/cluster_telemetry/csv_merger.py +++ b/cluster_telemetry/csv_merger.py
@@ -3,7 +3,12 @@ # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. -"""Python utility to merge many CSV files into a single file.""" +"""Python utility to merge many CSV files into a single file. + +If there are multiple CSV files with the same TELEMETRY_PAGE_NAME_KEY then the +median of all values is stored in the resultant CSV file. +""" + import csv import glob @@ -12,6 +17,9 @@ import sys +TELEMETRY_PAGE_NAME_KEY = 'page_name' + + class CsvMerger(object): """Class that merges many CSV files into a single file.""" @@ -29,24 +37,92 @@ field_names.update(csv.DictReader(open(csv_file, 'r')).fieldnames) return field_names + def _GetMedian(self, l): + """Returns the median value from the specified list.""" + l.sort() + length = len(l) + if not length % 2: + return (l[(length/2) - 1] + l[length/2]) / 2 + else: + return l[length/2] + + def _GetRowWithMedianValues(self, rows): + """Parses the specified rows and returns a single row with median values.""" + fieldname_to_values = {} + for row in rows: + for fieldname in row: + if fieldname == TELEMETRY_PAGE_NAME_KEY: + fieldname_to_values[fieldname] = row[fieldname] + continue + try: + value = float(row[fieldname]) + except ValueError: + # We expected only floats, cannot compare strings. Skip this field. + continue + if fieldname in fieldname_to_values: + fieldname_to_values[fieldname].append(value) + else: + fieldname_to_values[fieldname] = [value] + + median_row = {} + for fieldname, values in fieldname_to_values.items(): + if fieldname == TELEMETRY_PAGE_NAME_KEY: + median_row[fieldname] = values + continue + median_row[fieldname] = self._GetMedian(values) + + print + print 'For rows: %s' % rows + print 'Median row is %s' % median_row + print + return median_row + def Merge(self): """Method that does the CSV merging.""" field_names = self._GetFieldNames() print 'Merging %d csv files into %d columns' % (len(self._input_csv_files), len(field_names)) - dict_writer = csv.DictWriter(open(self._output_csv_name, 'w'), field_names) - dict_writer.writeheader() + # List that will contain all rows read from the CSV files. It will also + # combine all rows found with the same TELEMETRY_PAGE_NAME_KEY into one + # with median values. + csv_rows = [] - total_rows = 0 + # Dictionary containing all the encountered page names. If a page name that + # is already in the dictionary is encountered then the median of its + # values is used. + page_names_to_rows = {} for csv_file in self._input_csv_files: - print 'Merging %s' % csv_file - dict_reader = csv.DictReader(open(csv_file, 'r')) for row in dict_reader: - dict_writer.writerow(row) - total_rows += 1 + if TELEMETRY_PAGE_NAME_KEY in row: + # Add rows found with 'page_name' to a different dictionary for + # processing. + if row[TELEMETRY_PAGE_NAME_KEY] in page_names_to_rows: + page_names_to_rows[row[TELEMETRY_PAGE_NAME_KEY]].append(row) + else: + page_names_to_rows[row[TELEMETRY_PAGE_NAME_KEY]] = [row] + else: + # Add rows found without TELEMETRY_PAGE_NAME_KEY to the final list of + # rows, they require no further processing. + csv_rows.append(row) + + if page_names_to_rows: + for page_name in page_names_to_rows: + rows = page_names_to_rows[page_name] + median_row = self._GetRowWithMedianValues(rows) + # Add a single row that contains median values from all rows with the + # same TELEMETRY_PAGE_NAME_KEY. + csv_rows.append(median_row) + + # Write all rows in csv_rows to the specified output CSV. + dict_writer = csv.DictWriter(open(self._output_csv_name, 'w'), field_names) + dict_writer.writeheader() + total_rows = 0 + for row in csv_rows: + dict_writer.writerow(row) + total_rows += 1 print 'Successfully merged %d rows' % total_rows
diff --git a/cluster_telemetry/telemetry_slave_scripts/vm_run_telemetry.sh b/cluster_telemetry/telemetry_slave_scripts/vm_run_telemetry.sh index 6753315..5419fb1 100644 --- a/cluster_telemetry/telemetry_slave_scripts/vm_run_telemetry.sh +++ b/cluster_telemetry/telemetry_slave_scripts/vm_run_telemetry.sh
@@ -61,12 +61,17 @@ gsutil cp $WHITELIST_GS_LOCATION /tmp/$WHITELIST_FILE fi +# The number of times to repeate telemetry page_set runs. +REPEAT_TELEMETRY_RUNS=${REPEAT_TELEMETRY_RUNS:=3} + if [ "$TELEMETRY_BENCHMARK" == "skpicture_printer" ]; then # Clean and create the skp output directory. sudo chown -R chrome-bot:chrome-bot /b/storage/skps/$PAGESETS_TYPE/$CHROMIUM_BUILD_DIR rm -rf /b/storage/skps/$PAGESETS_TYPE/$CHROMIUM_BUILD_DIR mkdir -p /b/storage/skps/$PAGESETS_TYPE/$CHROMIUM_BUILD_DIR/ EXTRA_ARGS="--skp-outdir=/b/storage/skps/$PAGESETS_TYPE/$CHROMIUM_BUILD_DIR/ $EXTRA_ARGS" + # Only do one run for SKPs. + REPEAT_TELEMETRY_RUNS=1 fi if [ "$TELEMETRY_BENCHMARK" == "smoothness" ]; then @@ -78,6 +83,12 @@ OUTPUT_DIR=/b/storage/telemetry_outputs/$RUN_ID mkdir -p $OUTPUT_DIR +# Change all local page_sets to use 0 wait seconds. +find /home/default/storage/page_sets/$PAGESETS_TYPE/ -type f -exec sed -i "s/\"seconds\": 5/\"seconds\": 0/g" {} \; + +# Start the timer. +TIMER="$(date +%s)" + for page_set in /b/storage/page_sets/$PAGESETS_TYPE/*.json; do if [[ -f $page_set ]]; then if [[ ! -z "$WHITELIST_GS_LOCATION" ]]; then @@ -97,8 +108,14 @@ OUTPUT_DIR_ARG="-o $OUTPUT_DIR/${RUN_ID}.${page_set_basename}" fi echo "=== Running: eval sudo DISPLAY=:0 timeout 300 src/tools/perf/run_measurement --extra-browser-args=\"--disable-setuid-sandbox --enable-software-compositing $EXTRA_BROWSER_ARGS\" --browser-executable=/b/storage/chromium-builds/${CHROMIUM_BUILD_DIR}/chrome --browser=exact $TELEMETRY_BENCHMARK $page_set $EXTRA_ARGS $OUTPUT_DIR_ARG ===" - eval sudo DISPLAY=:0 timeout 300 src/tools/perf/run_measurement --extra-browser-args=\"--disable-setuid-sandbox --enable-software-compositing\" --browser-executable=/b/storage/chromium-builds/${CHROMIUM_BUILD_DIR}/chrome --browser=exact $TELEMETRY_BENCHMARK $page_set $EXTRA_ARGS $OUTPUT_DIR_ARG - sudo chown chrome-bot:chrome-bot $OUTPUT_DIR/${RUN_ID}.${page_set_basename} + + for current_run in `seq 1 $REPEAT_TELEMETRY_RUNS`; + do + echo "This is run number $current_run" + eval sudo DISPLAY=:0 timeout 300 src/tools/perf/run_measurement --extra-browser-args=\"--disable-setuid-sandbox --enable-software-compositing\" --browser-executable=/b/storage/chromium-builds/${CHROMIUM_BUILD_DIR}/chrome --browser=exact $TELEMETRY_BENCHMARK $page_set $EXTRA_ARGS ${OUTPUT_DIR_ARG}.${current_run} + sudo chown chrome-bot:chrome-bot $OUTPUT_DIR/${RUN_ID}.${page_set_basename}.${current_run} + done + if [ $? -eq 124 ]; then echo "========== $page_set timed out! ==========" else @@ -107,6 +124,9 @@ fi done +TELEMETRY_TIME="$(($(date +%s)-TIMER))" +echo "Going through all page_sets took $TELEMETRY_TIME seconds" + # Consolidate outputs from all page sets into a single file with special # handling for CSV files. mkdir $OUTPUT_DIR/${RUN_ID}
diff --git a/cluster_telemetry/test_data/csv_merger/expected_output b/cluster_telemetry/test_data/csv_merger/expected_output index 0552e0d..1d74af0 100644 --- a/cluster_telemetry/test_data/csv_merger/expected_output +++ b/cluster_telemetry/test_data/csv_merger/expected_output
@@ -1,16 +1,19 @@ -a,c,b,e,d,"e,heading","a,heading",y,x,z -,,,,,,,y4.1,x4.1,z4.1 -,,,,,,,y4.2,x4.2,z4.2 -,,,,,,,y4.3,x4.3,z4.3 -a5.1,,b5.1,e5.1,,,,,,z5.1 -a5.2,,b5.2,e5.2,,,,,,z5.2 -a5.3,,b5.3,e5.3,,,,,,z5.3 -a5.4,,b5.4,e5.4,,,,,,z5.4 -a1.1,c1.1,b1.1,,,,,,, -a1.2,c1.2,b1.2,,,,,,, -a2,c2,,e2,d2,,,,, -,,b5.1,,,a5.1,e5.1,,,z5.1 -,,b5.2,,,a5.2,e5.2,,,z5.2 -,,b5.3,,,a5.3,e5.3,,,z5.3 -,,b5.4,,,a5.4,e5.4,,,z5.4 -,,,,d5,,,y5,x5, +a,c,b,e,d,pixels_rasterized (pixels),pixels_recorded (pixels),record_time (ms),"e,heading",page_name,"a,heading",y,x,z,rasterize_time (ms) +a5.1,,b5.1,e5.1,,,,,,,,,,z5.1, +a5.2,,b5.2,e5.2,,,,,,,,,,z5.2, +a5.3,,b5.3,e5.3,,,,,,,,,,z5.3, +a5.4,,b5.4,e5.4,,,,,,,,,,z5.4, +,,b5.1,,,,,,a5.1,,e5.1,,,z5.1, +,,b5.2,,,,,,a5.2,,e5.2,,,z5.2, +,,b5.3,,,,,,a5.3,,e5.3,,,z5.3, +,,b5.4,,,,,,a5.4,,e5.4,,,z5.4, +a2,c2,,e2,d2,,,,,,,,,, +,,,,d5,,,,,,,y5,x5,, +,,,,,,,,,,,y4.1,x4.1,z4.1, +,,,,,,,,,,,y4.2,x4.2,z4.2, +,,,,,,,,,,,y4.3,x4.3,z4.3, +a1.1,c1.1,b1.1,,,,,,,,,,,, +a1.2,c1.2,b1.2,,,,,,,,,,,, +,,,,,2.5,1.5,,,http://www.google.com,,,,,1.0 +,,,,,1.0,,,,http://www.gmail.com,,,,,1.0 +,,,,,1310720.0,1172655.0,0.741,,http://www.facebook.com/,,,,,2.372
diff --git a/cluster_telemetry/test_data/csv_merger/page1-1.csv b/cluster_telemetry/test_data/csv_merger/page1-1.csv new file mode 100644 index 0000000..9221f31 --- /dev/null +++ b/cluster_telemetry/test_data/csv_merger/page1-1.csv
@@ -0,0 +1,2 @@ +pixels_recorded (pixels),page_name,rasterize_time (ms),record_time (ms),pixels_rasterized (pixels) +1172655,http://www.facebook.com/,2.359,0.743,1310720
diff --git a/cluster_telemetry/test_data/csv_merger/page1-2.csv b/cluster_telemetry/test_data/csv_merger/page1-2.csv new file mode 100644 index 0000000..6057f64 --- /dev/null +++ b/cluster_telemetry/test_data/csv_merger/page1-2.csv
@@ -0,0 +1,2 @@ +pixels_recorded (pixels),page_name,rasterize_time (ms),record_time (ms),pixels_rasterized (pixels) +1172655,http://www.facebook.com/,2.385,0.738,1310720
diff --git a/cluster_telemetry/test_data/csv_merger/page1-3.csv b/cluster_telemetry/test_data/csv_merger/page1-3.csv new file mode 100644 index 0000000..6df1ea6 --- /dev/null +++ b/cluster_telemetry/test_data/csv_merger/page1-3.csv
@@ -0,0 +1,2 @@ +pixels_recorded (pixels),page_name,rasterize_time (ms),record_time (ms),pixels_rasterized (pixels) +1172655,http://www.facebook.com/,2.372,0.741,1310720
diff --git a/cluster_telemetry/test_data/csv_merger/page1-4.csv b/cluster_telemetry/test_data/csv_merger/page1-4.csv new file mode 100644 index 0000000..49993b2 --- /dev/null +++ b/cluster_telemetry/test_data/csv_merger/page1-4.csv
@@ -0,0 +1,2 @@ +pixels_recorded (pixels),page_name,rasterize_time (ms),record_time (ms),pixels_rasterized (pixels) +,http://www.facebook.com/,,,1310720
diff --git a/cluster_telemetry/test_data/csv_merger/page2-1.csv b/cluster_telemetry/test_data/csv_merger/page2-1.csv new file mode 100644 index 0000000..192401f --- /dev/null +++ b/cluster_telemetry/test_data/csv_merger/page2-1.csv
@@ -0,0 +1,2 @@ +pixels_rasterized (pixels),pixels_recorded (pixels),page_name,rasterize_time (ms) +1,,http://www.google.com,1
diff --git a/cluster_telemetry/test_data/csv_merger/page2-2.csv b/cluster_telemetry/test_data/csv_merger/page2-2.csv new file mode 100644 index 0000000..68f2815 --- /dev/null +++ b/cluster_telemetry/test_data/csv_merger/page2-2.csv
@@ -0,0 +1,2 @@ +pixels_rasterized (pixels),pixels_recorded (pixels),page_name +2,,http://www.google.com
diff --git a/cluster_telemetry/test_data/csv_merger/page2-3.csv b/cluster_telemetry/test_data/csv_merger/page2-3.csv new file mode 100644 index 0000000..2a87573 --- /dev/null +++ b/cluster_telemetry/test_data/csv_merger/page2-3.csv
@@ -0,0 +1,2 @@ +pixels_rasterized (pixels),pixels_recorded (pixels),page_name +3,1,http://www.google.com
diff --git a/cluster_telemetry/test_data/csv_merger/page2-4.csv b/cluster_telemetry/test_data/csv_merger/page2-4.csv new file mode 100644 index 0000000..a71ad75 --- /dev/null +++ b/cluster_telemetry/test_data/csv_merger/page2-4.csv
@@ -0,0 +1,2 @@ +pixels_rasterized (pixels),pixels_recorded (pixels),page_name,y +4,2,http://www.google.com,
diff --git a/cluster_telemetry/test_data/csv_merger/page3-1.csv b/cluster_telemetry/test_data/csv_merger/page3-1.csv new file mode 100644 index 0000000..c5210da --- /dev/null +++ b/cluster_telemetry/test_data/csv_merger/page3-1.csv
@@ -0,0 +1,2 @@ +pixels_rasterized (pixels),pixels_recorded (pixels),page_name,rasterize_time (ms) +1,,http://www.gmail.com,1