-- This is a useful file for playing around with SQL queries against a database
-- populated with Perf data. You can use this file by running:
--
--   cockroach sql --insecure --host=localhost < ./migrations/cdb.sql
--
-- Make sure to apply the migrations the first time:
--
--   cockroach sql --insecure --host=localhost < ./migrations/cockroachdb/0001_create_initial_tables.up.sql
--
-- You should be able to run this file against the same database more than once
-- w/o error.
--
-- For reference, the following trace names are used in this file
-- and these are their md5 hashes:
--   ,arch=x86,config=8888,   =>  fe385b159ff55dca481069805e5ff050
--   ,arch=x86,config=565,    =>  277262a9236d571883d47dab102070bc
--   ,arch=risc-v,config=565, =>  2f9eedb889a1af4e0cf5a76d29cf12b3
INSERT INTO SourceFiles(
    source_file,
    source_file_id)
VALUES (
    'gs://perf-bucket/2020/02/08/11/testdata.json',
    1),
(
    'gs://perf-bucket/2020/02/08/12/testdata.json',
    2),
(
    'gs://perf-bucket/2020/02/08/13/testdata.json',
    3),
(
    'gs://perf-bucket/2020/02/08/14/testdata.json',
    4)
ON CONFLICT
    DO NOTHING;

INSERT INTO TraceValues(
    trace_id,
    commit_number,
    val,
    source_file_id)
VALUES (
    '\xfe385b159ff55dca481069805e5ff050',
    1,
    1.2,
    1),
(
    '\xfe385b159ff55dca481069805e5ff050',
    2,
    1.3,
    2),
(
    '\xfe385b159ff55dca481069805e5ff050',
    3,
    1.4,
    3),
(
    '\x2f9eedb889a1af4e0cf5a76d29cf12b3',
    256,
    1.1,
    4),
(
    '\x277262a9236d571883d47dab102070bc',
    1,
    2.2,
    1),
(
    '\x277262a9236d571883d47dab102070bc',
    2,
    2.3,
    2),
(
    '\x277262a9236d571883d47dab102070bc',
    3,
    2.4,
    3),
(
    '\x2f9eedb889a1af4e0cf5a76d29cf12b3',
    256,
    2.1,
    4)
ON CONFLICT
    DO NOTHING;

INSERT INTO ParamSets(
    tile_number,
    param_key,
    param_value)
VALUES (
    0,
    'config',
    '8888'),
(
    0,
    'config',
    '565'),
(
    0,
    'arch',
    'x86'),
(
    1,
    'config',
    '565'),
(
    1,
    'arch',
    'risc-v')
ON CONFLICT
    DO NOTHING;

INSERT INTO Postings(
    tile_number,
    key_value,
    trace_id)
VALUES (
    0,
    'arch=x86',
    '\xfe385b159ff55dca481069805e5ff050'),
(
    0,
    'arch=x86',
    '\x277262a9236d571883d47dab102070bc'),
(
    0,
    'config=8888',
    '\xfe385b159ff55dca481069805e5ff050'),
(
    0,
    'config=565',
    '\x277262a9236d571883d47dab102070bc'),
(
    1,
    'config=565',
    '\x2f9eedb889a1af4e0cf5a76d29cf12b3'),
(
    1,
    'arch=risc-v',
    '\x2f9eedb889a1af4e0cf5a76d29cf12b3')
ON CONFLICT
    DO NOTHING;

-- All trace_ids that match a particular key=value.
SELECT
    encode(trace_id, 'hex')
FROM
    Postings
WHERE
    key_value IN ('config=8888', 'config=565')
    AND tile_number = 0
ORDER BY
    trace_id ASC;

-- Retrieve matching values.
SELECT
    encode(trace_id, 'hex'),
    commit_number,
    val
FROM
    TraceValues
WHERE
    commit_number >= 0
    AND commit_number < 255
    AND trace_id IN ('\xfe385b159ff55dca481069805e5ff050', '\x277262a9236d571883d47dab102070bc');

-- Compound queries.
SELECT
    encode(trace_id, 'hex')
FROM
    Postings
WHERE
    key_value IN ('config=8888', 'config=565')
    AND tile_number = 0
INTERSECT
SELECT
    encode(trace_id, 'hex')
FROM
    Postings
WHERE
    key_value IN ('arch=x86')
    AND tile_number = 0;

-- ParamSet for a tile.
SELECT
    param_key,
    param_value
FROM
    ParamSets
WHERE
    tile_number = 1;

-- Most recent tile.
SELECT
    tile_number
FROM
    ParamSets @by_tile_number
ORDER BY
    tile_number DESC
LIMIT 1;

-- GetSource by trace_id.
SELECT
    SourceFiles.source_file
FROM
    TraceValues
    INNER LOOKUP
    JOIN SourceFiles ON TraceValues.source_file_id = SourceFiles.source_file_id
WHERE
    TraceValues.trace_id = '\xfe385b159ff55dca481069805e5ff050'
    AND TraceValues.commit_number = 1;

-- Count the number of matches to a query.
SELECT
    count(*)
FROM (
    SELECT
        trace_id
    FROM
        Postings
    WHERE
        key_value IN ('config=8888', 'config=565')
        AND tile_number = 0
    INTERSECT
    SELECT
        trace_id
    FROM
        Postings
    WHERE
        key_value IN ('arch=x86')
        AND tile_number = 0);

-- The first part of every query is to pull in the trace names
-- and trace_ids that match the given query for the given tile.
SELECT
    key_value,
    trace_id
FROM
    Postings @by_trace_id
WHERE
    tile_number = 0
    AND trace_id IN (
        SELECT
            trace_id
        FROM
            Postings
        WHERE
            key_value IN ('config=8888', 'config=565')
            AND tile_number = 0
        INTERSECT
        SELECT
            trace_id
        FROM
            Postings
        WHERE
            key_value IN ('arch=x86')
            AND tile_number = 0);

-- Then query for trace values in batches.
SELECT
    trace_id,
    commit_number,
    val
FROM
    TraceValues
WHERE
    tracevalues.commit_number >= 0
    AND tracevalues.commit_number < 256
    AND tracevalues.trace_id IN ('\xfe385b159ff55dca481069805e5ff050', '\x277262a9236d571883d47dab102070bc');

-- GetLastNSources
SELECT
    SourceFiles.source_file
FROM
    TraceValues @primary
    INNER LOOKUP
    JOIN SourceFiles @primary ON TraceValues.source_file_id = SourceFiles.source_file_id
WHERE
    TraceValues.trace_id = '\xfe385b159ff55dca481069805e5ff050'
ORDER BY
    TraceValues.commit_number DESC
LIMIT 5;

-- GetTraceIDsBySource
-- Note that this requires the tile_number, which we won't have,
-- unless GetLastNSources also returns the commit_number.
SELECT
    Postings.key_value,
    Postings.trace_id
FROM
    SourceFiles @by_source_file
    INNER LOOKUP
    JOIN TraceValues @by_source_file_id ON TraceValues.source_file_id = SourceFiles.source_file_id
    INNER LOOKUP
    JOIN Postings @by_trace_id ON TraceValues.trace_id = Postings.trace_id
WHERE
    SourceFiles.source_file = 'gs://perf-bucket/2020/02/08/11/testdata.json'
    AND Postings.tile_number = 0
ORDER BY
    Postings.trace_id;

-- Faster queries. Doing the merge will be faster with a smaller number of
-- traces returned. They idea is to first do count(*) queries for each
-- key=[values] part of a query, and use the smallest response to narrow down
-- the number of trace ids to a small number.
--
-- Then issue the queries for all the rest of the key=[values], but now also
-- include a restriction for the trace_ids.
SELECT
    count(*)
FROM
    Postings
WHERE
    tile_number = 0
    AND key_value = 'arch=x86';

-- Now that we've determined arch=x86 to return the smallest number of traces we
-- retrieve all those trace_id's.
SELECT
    encode(trace_id, 'hex')
FROM
    Postings
WHERE
    tile_number = 0
    AND key_value = 'arch=x86';

-- Now use those trace_id's to constrain the other queries.
--
-- In practice don't bother with the IN clause if the number of trace_id's
-- returned from the count(*) are too large, maybe > 1,000 as a cutoff?
SELECT
    encode(trace_id, 'hex')
FROM
    Postings
WHERE
    tile_number = 0
    AND key_value = 'config=565'
    AND trace_id IN ('\xfe385b159ff55dca481069805e5ff050', '\x277262a9236d571883d47dab102070bc');

-- The COUNT(*) is only useful if it's less than or equal to
-- countOptimizationThreshold, so limit the query to that many results. You
-- can't directly apply the LIMIT to the SELECT with the count(*) clause because
-- that always returns just one row.
SELECT
    count(*)
FROM (
    SELECT
        *
    FROM
        Postings
    WHERE
        tile_number = 0
        AND key_value = 'arch=x86'
    LIMIT 1);

