blob: 99ac8fdbc6af60288bec3c5a5d6072f9b035a066 [file] [log] [blame]
-- This is a useful file for playing around with SQL queries against a database
-- populated with Perf data. You can use this file by running:
--
-- cockroach sql --insecure --host=localhost < ./migrations/cdb.sql
--
-- Make sure to apply the migrations the first time:
--
-- cockroach sql --insecure --host=localhost < ./migrations/cockroachdb/0001_create_initial_tables.up.sql
--
-- You should be able to run this file against the same database more than once
-- w/o error.
--
-- For reference, the following trace names are used in this file
-- and these are their md5 hashes:
-- ,arch=x86,config=8888, => fe385b159ff55dca481069805e5ff050
-- ,arch=x86,config=565, => 277262a9236d571883d47dab102070bc
-- ,arch=risc-v,config=565, => 2f9eedb889a1af4e0cf5a76d29cf12b3
INSERT INTO SourceFiles(
source_file,
source_file_id)
VALUES (
'gs://perf-bucket/2020/02/08/11/testdata.json',
1),
(
'gs://perf-bucket/2020/02/08/12/testdata.json',
2),
(
'gs://perf-bucket/2020/02/08/13/testdata.json',
3),
(
'gs://perf-bucket/2020/02/08/14/testdata.json',
4)
ON CONFLICT
DO NOTHING;
INSERT INTO TraceValues(
trace_id,
commit_number,
val,
source_file_id)
VALUES (
'\xfe385b159ff55dca481069805e5ff050',
1,
1.2,
1),
(
'\xfe385b159ff55dca481069805e5ff050',
2,
1.3,
2),
(
'\xfe385b159ff55dca481069805e5ff050',
3,
1.4,
3),
(
'\x2f9eedb889a1af4e0cf5a76d29cf12b3',
256,
1.1,
4),
(
'\x277262a9236d571883d47dab102070bc',
1,
2.2,
1),
(
'\x277262a9236d571883d47dab102070bc',
2,
2.3,
2),
(
'\x277262a9236d571883d47dab102070bc',
3,
2.4,
3),
(
'\x2f9eedb889a1af4e0cf5a76d29cf12b3',
256,
2.1,
4)
ON CONFLICT
DO NOTHING;
INSERT INTO ParamSets(
tile_number,
param_key,
param_value)
VALUES (
0,
'config',
'8888'),
(
0,
'config',
'565'),
(
0,
'arch',
'x86'),
(
1,
'config',
'565'),
(
1,
'arch',
'risc-v')
ON CONFLICT
DO NOTHING;
INSERT INTO Postings(
tile_number,
key_value,
trace_id)
VALUES (
0,
'arch=x86',
'\xfe385b159ff55dca481069805e5ff050'),
(
0,
'arch=x86',
'\x277262a9236d571883d47dab102070bc'),
(
0,
'config=8888',
'\xfe385b159ff55dca481069805e5ff050'),
(
0,
'config=565',
'\x277262a9236d571883d47dab102070bc'),
(
1,
'config=565',
'\x2f9eedb889a1af4e0cf5a76d29cf12b3'),
(
1,
'arch=risc-v',
'\x2f9eedb889a1af4e0cf5a76d29cf12b3')
ON CONFLICT
DO NOTHING;
-- All trace_ids that match a particular key=value.
SELECT
encode(trace_id, 'hex')
FROM
Postings
WHERE
key_value IN ('config=8888', 'config=565')
AND tile_number = 0
ORDER BY
trace_id ASC;
-- Retrieve matching values.
SELECT
encode(trace_id, 'hex'),
commit_number,
val
FROM
TraceValues
WHERE
commit_number >= 0
AND commit_number < 255
AND trace_id IN ('\xfe385b159ff55dca481069805e5ff050', '\x277262a9236d571883d47dab102070bc');
-- Compound queries.
SELECT
encode(trace_id, 'hex')
FROM
Postings
WHERE
key_value IN ('config=8888', 'config=565')
AND tile_number = 0
INTERSECT
SELECT
encode(trace_id, 'hex')
FROM
Postings
WHERE
key_value IN ('arch=x86')
AND tile_number = 0;
-- ParamSet for a tile.
SELECT
param_key,
param_value
FROM
ParamSets
WHERE
tile_number = 1;
-- Most recent tile.
SELECT
tile_number
FROM
ParamSets @by_tile_number
ORDER BY
tile_number DESC
LIMIT 1;
-- GetSource by trace_id.
SELECT
SourceFiles.source_file
FROM
TraceValues
INNER LOOKUP
JOIN SourceFiles ON TraceValues.source_file_id = SourceFiles.source_file_id
WHERE
TraceValues.trace_id = '\xfe385b159ff55dca481069805e5ff050'
AND TraceValues.commit_number = 1;
-- Count the number of matches to a query.
SELECT
count(*)
FROM (
SELECT
trace_id
FROM
Postings
WHERE
key_value IN ('config=8888', 'config=565')
AND tile_number = 0
INTERSECT
SELECT
trace_id
FROM
Postings
WHERE
key_value IN ('arch=x86')
AND tile_number = 0);
-- The first part of every query is to pull in the trace names
-- and trace_ids that match the given query for the given tile.
SELECT
key_value,
trace_id
FROM
Postings @by_trace_id
WHERE
tile_number = 0
AND trace_id IN (
SELECT
trace_id
FROM
Postings
WHERE
key_value IN ('config=8888', 'config=565')
AND tile_number = 0
INTERSECT
SELECT
trace_id
FROM
Postings
WHERE
key_value IN ('arch=x86')
AND tile_number = 0);
-- Then query for trace values in batches.
SELECT
trace_id,
commit_number,
val
FROM
TraceValues
WHERE
tracevalues.commit_number >= 0
AND tracevalues.commit_number < 256
AND tracevalues.trace_id IN ('\xfe385b159ff55dca481069805e5ff050', '\x277262a9236d571883d47dab102070bc');
-- GetLastNSources
SELECT
SourceFiles.source_file
FROM
TraceValues @primary
INNER LOOKUP
JOIN SourceFiles @primary ON TraceValues.source_file_id = SourceFiles.source_file_id
WHERE
TraceValues.trace_id = '\xfe385b159ff55dca481069805e5ff050'
ORDER BY
TraceValues.commit_number DESC
LIMIT 5;
-- GetTraceIDsBySource
-- Note that this requires the tile_number, which we won't have,
-- unless GetLastNSources also returns the commit_number.
SELECT
Postings.key_value,
Postings.trace_id
FROM
SourceFiles @by_source_file
INNER LOOKUP
JOIN TraceValues @by_source_file_id ON TraceValues.source_file_id = SourceFiles.source_file_id
INNER LOOKUP
JOIN Postings @by_trace_id ON TraceValues.trace_id = Postings.trace_id
WHERE
SourceFiles.source_file = 'gs://perf-bucket/2020/02/08/11/testdata.json'
AND Postings.tile_number = 0
ORDER BY
Postings.trace_id;
-- Faster queries. Doing the merge will be faster with a smaller number of
-- traces returned. They idea is to first do count(*) queries for each
-- key=[values] part of a query, and use the smallest response to narrow down
-- the number of trace ids to a small number.
--
-- Then issue the queries for all the rest of the key=[values], but now also
-- include a restriction for the trace_ids.
SELECT
count(*)
FROM
Postings
WHERE
tile_number = 0
AND key_value = 'arch=x86';
-- Now that we've determined arch=x86 to return the smallest number of traces we
-- retrieve all those trace_id's.
SELECT
encode(trace_id, 'hex')
FROM
Postings
WHERE
tile_number = 0
AND key_value = 'arch=x86';
-- Now use those trace_id's to constrain the other queries.
--
-- In practice don't bother with the IN clause if the number of trace_id's
-- returned from the count(*) are too large, maybe > 1,000 as a cutoff?
SELECT
encode(trace_id, 'hex')
FROM
Postings
WHERE
tile_number = 0
AND key_value = 'config=565'
AND trace_id IN ('\xfe385b159ff55dca481069805e5ff050', '\x277262a9236d571883d47dab102070bc');
-- The COUNT(*) is only useful if it's less than or equal to
-- countOptimizationThreshold, so limit the query to that many results. You
-- can't directly apply the LIMIT to the SELECT with the count(*) clause because
-- that always returns just one row.
SELECT
count(*)
FROM (
SELECT
*
FROM
Postings
WHERE
tile_number = 0
AND key_value = 'arch=x86'
LIMIT 1);