| -- This is a useful file for playing around with SQL queries against a database |
| -- populated with Perf data. You can use this file by running: |
| -- |
| -- cockroach sql --insecure --host=localhost < ./migrations/cdb.sql |
| -- |
| -- Make sure to apply the migrations the first time: |
| -- |
| -- cockroach sql --insecure --host=localhost < ./migrations/cockroachdb/0001_create_initial_tables.up.sql |
| -- |
| -- You should be able to run this file against the same database more than once |
| -- w/o error. |
| -- |
| -- For reference, the following trace names are used in this file |
| -- and these are their md5 hashes: |
| -- ,arch=x86,config=8888, => fe385b159ff55dca481069805e5ff050 |
| -- ,arch=x86,config=565, => 277262a9236d571883d47dab102070bc |
| -- ,arch=risc-v,config=565, => 2f9eedb889a1af4e0cf5a76d29cf12b3 |
| INSERT INTO SourceFiles( |
| source_file, |
| source_file_id) |
| VALUES ( |
| 'gs://perf-bucket/2020/02/08/11/testdata.json', |
| 1), |
| ( |
| 'gs://perf-bucket/2020/02/08/12/testdata.json', |
| 2), |
| ( |
| 'gs://perf-bucket/2020/02/08/13/testdata.json', |
| 3), |
| ( |
| 'gs://perf-bucket/2020/02/08/14/testdata.json', |
| 4) |
| ON CONFLICT |
| DO NOTHING; |
| |
| INSERT INTO TraceValues( |
| trace_id, |
| commit_number, |
| val, |
| source_file_id) |
| VALUES ( |
| '\xfe385b159ff55dca481069805e5ff050', |
| 1, |
| 1.2, |
| 1), |
| ( |
| '\xfe385b159ff55dca481069805e5ff050', |
| 2, |
| 1.3, |
| 2), |
| ( |
| '\xfe385b159ff55dca481069805e5ff050', |
| 3, |
| 1.4, |
| 3), |
| ( |
| '\x2f9eedb889a1af4e0cf5a76d29cf12b3', |
| 256, |
| 1.1, |
| 4), |
| ( |
| '\x277262a9236d571883d47dab102070bc', |
| 1, |
| 2.2, |
| 1), |
| ( |
| '\x277262a9236d571883d47dab102070bc', |
| 2, |
| 2.3, |
| 2), |
| ( |
| '\x277262a9236d571883d47dab102070bc', |
| 3, |
| 2.4, |
| 3), |
| ( |
| '\x2f9eedb889a1af4e0cf5a76d29cf12b3', |
| 256, |
| 2.1, |
| 4) |
| ON CONFLICT |
| DO NOTHING; |
| |
| INSERT INTO ParamSets( |
| tile_number, |
| param_key, |
| param_value) |
| VALUES ( |
| 0, |
| 'config', |
| '8888'), |
| ( |
| 0, |
| 'config', |
| '565'), |
| ( |
| 0, |
| 'arch', |
| 'x86'), |
| ( |
| 1, |
| 'config', |
| '565'), |
| ( |
| 1, |
| 'arch', |
| 'risc-v') |
| ON CONFLICT |
| DO NOTHING; |
| |
| INSERT INTO Postings( |
| tile_number, |
| key_value, |
| trace_id) |
| VALUES ( |
| 0, |
| 'arch=x86', |
| '\xfe385b159ff55dca481069805e5ff050'), |
| ( |
| 0, |
| 'arch=x86', |
| '\x277262a9236d571883d47dab102070bc'), |
| ( |
| 0, |
| 'config=8888', |
| '\xfe385b159ff55dca481069805e5ff050'), |
| ( |
| 0, |
| 'config=565', |
| '\x277262a9236d571883d47dab102070bc'), |
| ( |
| 1, |
| 'config=565', |
| '\x2f9eedb889a1af4e0cf5a76d29cf12b3'), |
| ( |
| 1, |
| 'arch=risc-v', |
| '\x2f9eedb889a1af4e0cf5a76d29cf12b3') |
| ON CONFLICT |
| DO NOTHING; |
| |
| -- All trace_ids that match a particular key=value. |
| SELECT |
| encode(trace_id, 'hex') |
| FROM |
| Postings |
| WHERE |
| key_value IN ('config=8888', 'config=565') |
| AND tile_number = 0 |
| ORDER BY |
| trace_id ASC; |
| |
| -- Retrieve matching values. |
| SELECT |
| encode(trace_id, 'hex'), |
| commit_number, |
| val |
| FROM |
| TraceValues |
| WHERE |
| commit_number >= 0 |
| AND commit_number < 255 |
| AND trace_id IN ('\xfe385b159ff55dca481069805e5ff050', '\x277262a9236d571883d47dab102070bc'); |
| |
| -- Compound queries. |
| SELECT |
| encode(trace_id, 'hex') |
| FROM |
| Postings |
| WHERE |
| key_value IN ('config=8888', 'config=565') |
| AND tile_number = 0 |
| INTERSECT |
| SELECT |
| encode(trace_id, 'hex') |
| FROM |
| Postings |
| WHERE |
| key_value IN ('arch=x86') |
| AND tile_number = 0; |
| |
| -- ParamSet for a tile. |
| SELECT |
| param_key, |
| param_value |
| FROM |
| ParamSets |
| WHERE |
| tile_number = 1; |
| |
| -- Most recent tile. |
| SELECT |
| tile_number |
| FROM |
| ParamSets @by_tile_number |
| ORDER BY |
| tile_number DESC |
| LIMIT 1; |
| |
| -- GetSource by trace_id. |
| SELECT |
| SourceFiles.source_file |
| FROM |
| TraceValues |
| INNER LOOKUP |
| JOIN SourceFiles ON TraceValues.source_file_id = SourceFiles.source_file_id |
| WHERE |
| TraceValues.trace_id = '\xfe385b159ff55dca481069805e5ff050' |
| AND TraceValues.commit_number = 1; |
| |
| -- Count the number of matches to a query. |
| SELECT |
| count(*) |
| FROM ( |
| SELECT |
| trace_id |
| FROM |
| Postings |
| WHERE |
| key_value IN ('config=8888', 'config=565') |
| AND tile_number = 0 |
| INTERSECT |
| SELECT |
| trace_id |
| FROM |
| Postings |
| WHERE |
| key_value IN ('arch=x86') |
| AND tile_number = 0); |
| |
| -- The first part of every query is to pull in the trace names |
| -- and trace_ids that match the given query for the given tile. |
| SELECT |
| key_value, |
| trace_id |
| FROM |
| Postings @by_trace_id |
| WHERE |
| tile_number = 0 |
| AND trace_id IN ( |
| SELECT |
| trace_id |
| FROM |
| Postings |
| WHERE |
| key_value IN ('config=8888', 'config=565') |
| AND tile_number = 0 |
| INTERSECT |
| SELECT |
| trace_id |
| FROM |
| Postings |
| WHERE |
| key_value IN ('arch=x86') |
| AND tile_number = 0); |
| |
| -- Then query for trace values in batches. |
| SELECT |
| trace_id, |
| commit_number, |
| val |
| FROM |
| TraceValues |
| WHERE |
| tracevalues.commit_number >= 0 |
| AND tracevalues.commit_number < 256 |
| AND tracevalues.trace_id IN ('\xfe385b159ff55dca481069805e5ff050', '\x277262a9236d571883d47dab102070bc'); |
| |
| -- GetLastNSources |
| SELECT |
| SourceFiles.source_file |
| FROM |
| TraceValues @primary |
| INNER LOOKUP |
| JOIN SourceFiles @primary ON TraceValues.source_file_id = SourceFiles.source_file_id |
| WHERE |
| TraceValues.trace_id = '\xfe385b159ff55dca481069805e5ff050' |
| ORDER BY |
| TraceValues.commit_number DESC |
| LIMIT 5; |
| |
| -- GetTraceIDsBySource |
| -- Note that this requires the tile_number, which we won't have, |
| -- unless GetLastNSources also returns the commit_number. |
| SELECT |
| Postings.key_value, |
| Postings.trace_id |
| FROM |
| SourceFiles @by_source_file |
| INNER LOOKUP |
| JOIN TraceValues @by_source_file_id ON TraceValues.source_file_id = SourceFiles.source_file_id |
| INNER LOOKUP |
| JOIN Postings @by_trace_id ON TraceValues.trace_id = Postings.trace_id |
| WHERE |
| SourceFiles.source_file = 'gs://perf-bucket/2020/02/08/11/testdata.json' |
| AND Postings.tile_number = 0 |
| ORDER BY |
| Postings.trace_id; |
| |
| -- Faster queries. Doing the merge will be faster with a smaller number of |
| -- traces returned. They idea is to first do count(*) queries for each |
| -- key=[values] part of a query, and use the smallest response to narrow down |
| -- the number of trace ids to a small number. |
| -- |
| -- Then issue the queries for all the rest of the key=[values], but now also |
| -- include a restriction for the trace_ids. |
| SELECT |
| count(*) |
| FROM |
| Postings |
| WHERE |
| tile_number = 0 |
| AND key_value = 'arch=x86'; |
| |
| -- Now that we've determined arch=x86 to return the smallest number of traces we |
| -- retrieve all those trace_id's. |
| SELECT |
| encode(trace_id, 'hex') |
| FROM |
| Postings |
| WHERE |
| tile_number = 0 |
| AND key_value = 'arch=x86'; |
| |
| -- Now use those trace_id's to constrain the other queries. |
| -- |
| -- In practice don't bother with the IN clause if the number of trace_id's |
| -- returned from the count(*) are too large, maybe > 1,000 as a cutoff? |
| SELECT |
| encode(trace_id, 'hex') |
| FROM |
| Postings |
| WHERE |
| tile_number = 0 |
| AND key_value = 'config=565' |
| AND trace_id IN ('\xfe385b159ff55dca481069805e5ff050', '\x277262a9236d571883d47dab102070bc'); |
| |
| -- The COUNT(*) is only useful if it's less than or equal to |
| -- countOptimizationThreshold, so limit the query to that many results. You |
| -- can't directly apply the LIMIT to the SELECT with the count(*) clause because |
| -- that always returns just one row. |
| SELECT |
| count(*) |
| FROM ( |
| SELECT |
| * |
| FROM |
| Postings |
| WHERE |
| tile_number = 0 |
| AND key_value = 'arch=x86' |
| LIMIT 1); |
| |