blob: c977f45c631ebf3a5d5501a985ac24016d678926 [file] [log] [blame]
/*
* Copyright 2016 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can
* be found in the LICENSE file.
*
*/
#ifndef SKC_ONCE_TILE
#define SKC_ONCE_TILE
//
//
//
#include "macros.h"
//
// Hard requirements:
//
// - A TTXB "block pool" extent that is at least 1GB.
//
// - A virtual surface of at least 8K x 8K
//
// - A physical surface of __don't really care__ because it's
// advantageous to tile the physical surface since it's likely
// to shrink the post-place TTCK sorting step.
//
//
// EXTENT TTXB BITS
// SIZE (MB) +------------------------------------+
// | 22 23 24 25 26 27 |
// +----+------------------------------------+
// | 8 | 128 256 512 1024 2048 4096 |
// TTXB | 16 | 256 512 1024 2048 4096 8192 |
// WORDS | 32 | 512 1024 2048 4096 8192 16384 |
// | 64 | 1024 2048 4096 8192 16384 32768 |
// +----+------------------------------------+
//
//
// SURF X/Y BITS
// TILE +------------------------------------------------------+
// | 5 6 7 8 9 10 11 12 13 |
// +----+------------------------------------------------------+
// | 3 | 256 512 1024 2048 4096 8192 16384 32768 65536 |
// TILE | 4 | 512 1024 2048 4096 8192 16384 32768 65536 128K |
// SIDE | 5 | 1024 2048 4096 8192 16384 32768 65536 128K 256K |
// BITS | 6 | 2048 4096 8192 16384 32768 65536 128K 256K 512K |
// | 7 | 4096 8192 16384 32768 65536 128K 256K 512K 1024K |
// +----+------------------------------------------------------+
// TILES^2 | 1024 4096 16384 65536 256K 1M 4M 16M 64M |
// +------------------------------------------------------+
//
// The following values should be pretty future-proof across all GPUs:
//
// - The minimum addressable subblock size is 16 words (64 bytes) to
// ensure there is enough space for a path or raster header and
// its payload.
//
// - Blocks are power-of-2 multiples of subblocks. Larger blocks can
// reduce allocation activity (fewer atomic adds).
//
// - 27 bits of TTXB_ID space implies a max of 4GB-32GB of
// rasterized paths depending on the size of the TTXB block.
// This could enable interesting use cases.
//
// - A virtual rasterization surface that's from +/-16K to +/-128K
// depending on the size of the TTXB block.
//
// - Keys that (optionally) only require a 32-bit high word
// comparison.
//
// - Support for a minimum of 256K layers. This can be practically
// raised to 1m or 2m layers.
//
//
// TTRK (64-bit COMPARE)
//
// 0 63
// | TTSB ID | X | Y | RASTER COHORT ID |
// +---------+------+------+------------------+
// | 27 | 12 | 12 | 13 |
//
//
// TTRK (32-BIT COMPARE) ( DEFAULT )
//
// 0 63
// | TTSB ID | N/A | X | Y | RASTER COHORT ID |
// +---------+-----+------+------+------------------+
// | 27 | 5 | 12 | 12 | 8 |
//
//
// TTSK v1
//
// 0 63
// | TTSB ID | IS_PREFIX | N/A | X | Y |
// +---------+-----------+------+----+----+
// | 27 | 1 (=0) | 12 | 12 | 12 |
//
//
// TTPK v2 ( DEFAULT )
//
// 0 63
// | TTPB ID | IS_PREFIX | SPAN | X | Y |
// +---------+-----------+------+-----+-----+
// | 27 | 1 (=1) | 12 | 12 | 12 |
//
//
// TTCK (32-BIT COMPARE) v1
//
// 0 63
// | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y |
// +----------------------+--------+--------+-------+-----+-----+
// | 30 | 1 | 1 | 18 | 7 | 7 |
//
//
// TTCK (32-BIT COMPARE) v2
//
// 0 63
// | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y |
// +----------------------+--------+--------+-------+-----+-----+
// | 30 | 1 | 1 | 15 | 9 | 8 |
//
//
// TTCK (64-BIT COMPARE) -- achieves 4K x 4K with an 8x16 tile ( DEFAULT )
//
// 0 63
// | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y |
// +----------------------+--------+--------+-------+-----+-----+
// | 27 | 1 | 1 | 18 | 9 | 8 |
//
//
//
//
#define SKC_SUBPIXEL_RESL_X_LOG2 5
#define SKC_SUBPIXEL_RESL_Y_LOG2 5
//
// FIXME -- COMMON -- HOIST ELSEWHERE
//
#define SKC_TILE_WIDTH (1 << SKC_TILE_WIDTH_LOG2)
#define SKC_TILE_HEIGHT (1 << SKC_TILE_HEIGHT_LOG2)
#define SKC_SUBPIXEL_RESL_X (1 << SKC_SUBPIXEL_RESL_X_LOG2)
#define SKC_SUBPIXEL_RESL_Y (1 << SKC_SUBPIXEL_RESL_Y_LOG2)
//
// PLATFORM SURFACE TILE SIZE
//
#define SKC_TILE_WIDTH_MASK SKC_BITS_TO_MASK(SKC_TILE_WIDTH_LOG2)
#define SKC_TILE_HEIGHT_MASK SKC_BITS_TO_MASK(SKC_TILE_HEIGHT_LOG2)
//
// TILE SUBPIXEL RESOLUTION
//
#define SKC_SUBPIXEL_RESL_X (1 << SKC_SUBPIXEL_RESL_X_LOG2)
#define SKC_SUBPIXEL_RESL_Y (1 << SKC_SUBPIXEL_RESL_Y_LOG2)
#define SKC_SUBPIXEL_MASK_X SKC_BITS_TO_MASK(SKC_SUBPIXEL_RESL_X_LOG2)
#define SKC_SUBPIXEL_MASK_Y SKC_BITS_TO_MASK(SKC_SUBPIXEL_RESL_Y_LOG2)
#define SKC_SUBPIXEL_RESL_X_F32 ((float)(SKC_SUBPIXEL_RESL_X))
#define SKC_SUBPIXEL_RESL_Y_F32 ((float)(SKC_SUBPIXEL_RESL_Y))
#define SKC_SUBPIXEL_X_SCALE_UP SKC_SUBPIXEL_RESL_X_F32
#define SKC_SUBPIXEL_Y_SCALE_UP SKC_SUBPIXEL_RESL_Y_F32
#define SKC_SUBPIXEL_X_SCALE_DOWN (1.0f / SKC_SUBPIXEL_RESL_X_F32)
#define SKC_SUBPIXEL_Y_SCALE_DOWN (1.0f / SKC_SUBPIXEL_RESL_Y_F32)
//
// SUBTILE RESOLUTION
//
#define SKC_SUBTILE_RESL_X_LOG2 (SKC_TILE_WIDTH_LOG2 + SKC_SUBPIXEL_RESL_X_LOG2)
#define SKC_SUBTILE_RESL_Y_LOG2 (SKC_TILE_HEIGHT_LOG2 + SKC_SUBPIXEL_RESL_Y_LOG2)
#define SKC_SUBTILE_RESL_X (1 << SKC_SUBTILE_RESL_X_LOG2)
#define SKC_SUBTILE_RESL_Y (1 << SKC_SUBTILE_RESL_Y_LOG2)
#define SKC_SUBTILE_MASK_X SKC_BITS_TO_MASK(SKC_SUBTILE_RESL_X_LOG2)
#define SKC_SUBTILE_MASK_Y SKC_BITS_TO_MASK(SKC_SUBTILE_RESL_Y_LOG2)
#define SKC_SUBTILE_RESL_X_F32 ((float)(SKC_SUBTILE_RESL_X))
#define SKC_SUBTILE_RESL_Y_F32 ((float)(SKC_SUBTILE_RESL_Y))
#define SKC_SUBTILE_X_SCALE_DOWN (1.0f / SKC_SUBTILE_RESL_X_F32)
#define SKC_SUBTILE_Y_SCALE_DOWN (1.0f / SKC_SUBTILE_RESL_Y_F32)
//
//
//
#define SKC_TILE_X_OFFSET_U32 (1 << (SKC_TTSK_BITS_X-1))
#define SKC_TILE_X_SPAN_U32 (1 << (SKC_TTSK_BITS_X)) // exclusive
#define SKC_TILE_Y_OFFSET_U32 (1 << (SKC_TTSK_BITS_Y-1))
#define SKC_TILE_Y_SPAN_U32 (1 << (SKC_TTSK_BITS_Y)) // exclusive
#define SKC_TILE_X_OFFSET_F32 0 // ((float)SKC_TILE_X_OFFSET_U32)
#define SKC_TILE_X_SPAN_F32 ((float)SKC_TILE_X_SPAN_U32)
#define SKC_TILE_Y_OFFSET_F32 0 // ((float)SKC_TILE_Y_OFFSET_U32)
#define SKC_TILE_Y_SPAN_F32 ((float)SKC_TILE_Y_SPAN_U32)
//
// TILE TRACE SUBPIXEL, PREFIX & COMPOSITION KEYS
//
// These keys are are purposefully 64-bits so they can be sorted with
// Hotsort's 32:32 or 64-bit implementation.
//
// Tiles are 32x32 on CUDA but can be made rectangular or smaller to
// fit other architectures.
//
// TW : tile width
// TH : tile height
//
// TTS : tile trace subpixel
// TTSB : tile trace subpixel block
// TTRK : tile trace subpixel key while in raster cohort
// TTSK : tile trace subpixel key
//
// TTP : tile trace prefix
// TTPB : tile trace prefix block
// TTPK : tile trace prefix key
//
// TTCK : tile trace composition key
//
//
// TILE TRACE SUBPIXEL
//
// The subpixels are encoded with either absolute tile coordinates
// (32-bits) or packed in delta-encoded form form.
//
// For 32-bit subpixel packing of a 32x32 or smaller tile:
//
// A tile X is encoded as:
//
// TX : 10 : unsigned min(x0,x1) tile subpixel coordinate.
//
// SX : 6 : unsigned subpixel span from min to max x with range
// [0,32]. The original direction is not captured. Would
// be nice to capture dx but not necessary right now but
// could be in the future. <--- SPARE VALUES AVAILABLE
//
// A tile Y is encoded as:
//
// TY : 10 : unsigned min(y0,y1) tile subpixel coordinate.
//
// DY : 6 : signed subpixel delta y1-y0. The range of delta is
// [-32,32] but horizontal lines are not encoded so [1,32]
// is mapped to [0,31]. The resulting range [-32,31] fits
// in 6 bits.
//
// TTS:
//
// 0 31
// | TX | SX | TY | DY |
// +-----+------+-----+------+
// | 10 | 6 | 10 | 6 |
//
#define SKC_TTS_BITS_TX 10
#define SKC_TTS_BITS_SX 6
#define SKC_TTS_BITS_TY 10
#define SKC_TTS_BITS_SY 6
//
//
//
#define SKC_TTS_INVALID ( SKC_UINT_MAX ) // relies on limited range of dx
//
//
//
#define SKC_TTS_OFFSET_SX (SKC_TTS_BITS_TX)
#define SKC_TTS_OFFSET_TY (SKC_TTS_BITS_TX + SKC_TTS_BITS_SX)
#define SKC_TTS_OFFSET_DY (SKC_TTS_BITS_TX + SKC_TTS_BITS_SX + SKC_TTS_BITS_TY)
#define SKC_TTS_MASK_TX SKC_BITS_TO_MASK(SKC_TTS_BITS_TX)
#define SKC_TTS_MASK_SX SKC_BITS_TO_MASK_AT(SKC_TTS_BITS_SX,SKC_TTS_OFFSET_SX)
#define SKC_TTS_MASK_TY SKC_BITS_TO_MASK_AT(SKC_TTS_BITS_TY,SKC_TTS_OFFSET_TY)
#define SKC_TTS_MASK_TX_PIXEL SKC_BITS_TO_MASK_AT(SKC_TTS_BITS_TX-SKC_SUBPIXEL_RESL_X_LOG2, \
SKC_SUBPIXEL_RESL_X_LOG2)
#define SKC_TTS_MASK_TY_PIXEL SKC_BITS_TO_MASK_AT(SKC_TTS_BITS_TY-SKC_SUBPIXEL_RESL_Y_LOG2, \
SKC_TTS_OFFSET_TY+SKC_SUBPIXEL_RESL_Y_LOG2)
//
// TTRK (64-BIT COMPARE)
//
// 0 63
// | TTSB ID | X | Y | COHORT ID |
// +---------+------+------+-----------+
// | 27 | 12 | 12 | 13 |
//
//
// TTRK (32-BIT COMPARE)
//
// 0 63
// | TTSB ID | N/A | X | Y | COHORT ID |
// +---------+-----+------+------+-----------+
// | 27 | 5 | 12 | 12 | 8 |
//
//
// TTRK is sortable intermediate key format for TTSK
//
// We're going to use the 32-bit comparison version for now
//
//
// TTSK v2:
//
// 0 63
// | TTSB ID | PREFIX | N/A | X | Y |
// +---------+--------+------+----+----+
// | 27 | 1 (=0) | 12 | 12 | 12 |
//
//
// TTPK v2:
//
// 0 63
// | TTPB ID | PREFIX | SPAN | X | Y |
// +---------+--------+------+-----+-----+
// | 27 | 1 (=1) | 12 | 12 | 12 |
//
#define SKC_TTXK_LO_BITS_ID 27
#define SKC_TTXK_LO_BITS_PREFIX 1
#define SKC_TTXK_HI_BITS_Y 12
#define SKC_TTXK_HI_BITS_X 12
#define SKC_TTXK_BITS_SPAN 12
#define SKC_TTXK_HI_BITS_YX (SKC_TTXK_HI_BITS_Y + SKC_TTXK_HI_BITS_X)
#define SKC_TTRK_HI_MASK_X SKC_BITS_TO_MASK(SKC_TTXK_HI_BITS_X)
#define SKC_TTRK_HI_MASK_YX SKC_BITS_TO_MASK(SKC_TTXK_HI_BITS_YX)
#define SKC_TTRK_HI_BITS_COHORT 8
#define SKC_TTRK_LO_BITS_NA (32 - SKC_TTXK_LO_BITS_ID)
#define SKC_TTRK_HI_BITS_COHORT_Y (SKC_TTRK_HI_BITS_COHORT + SKC_TTXK_HI_BITS_Y)
#define SKC_TTRK_HI_OFFSET_COHORT (32 - SKC_TTRK_HI_BITS_COHORT)
#define SKC_TTRK_HI_MASK_COHORT SKC_BITS_TO_MASK_AT(SKC_TTRK_HI_BITS_COHORT,SKC_TTRK_HI_OFFSET_COHORT)
#define SKC_TTRK_HI_BITS_COHORT_YX (SKC_TTRK_HI_BITS_COHORT + SKC_TTXK_HI_BITS_Y + SKC_TTXK_HI_BITS_X)
#define SKC_TTXK_LO_BITS_ID_PREFIX (SKC_TTXK_LO_BITS_ID + SKC_TTXK_LO_BITS_PREFIX)
#define SKC_TTXK_LO_OFFSET_PREFIX SKC_TTXK_LO_BITS_ID
#define SKC_TTXK_LO_OFFSET_SPAN SKC_TTXK_LO_BITS_ID_PREFIX
#define SKC_TTXK_LO_BITS_SPAN (32 - SKC_TTXK_LO_BITS_ID_PREFIX)
#define SKC_TTXK_HI_BITS_SPAN (SKC_TTXK_BITS_SPAN - SKC_TTXK_LO_BITS_SPAN)
#define SKC_TTXK_LO_OFFSET_PREFIX SKC_TTXK_LO_BITS_ID
#define SKC_TTXK_LO_MASK_ID SKC_BITS_TO_MASK(SKC_TTXK_LO_BITS_ID)
#define SKC_TTXK_LO_MASK_PREFIX SKC_BITS_TO_MASK_AT(SKC_TTXK_LO_BITS_PREFIX,SKC_TTXK_LO_OFFSET_PREFIX)
#define SKC_TTXK_LO_MASK_ID_PREFIX SKC_BITS_TO_MASK(SKC_TTXK_LO_BITS_ID_PREFIX)
#define SKC_TTXK_HI_OFFSET_Y (32 - SKC_TTXK_HI_BITS_Y)
#define SKC_TTXK_HI_OFFSET_X (SKC_TTXK_HI_OFFSET_Y - SKC_TTXK_HI_BITS_X)
#define SKC_TTXK_HI_ONE_X (1u << SKC_TTXK_HI_OFFSET_X)
#define SKC_TTXK_HI_MASK_YX SKC_BITS_TO_MASK_AT(SKC_TTXK_HI_BITS_YX,SKC_TTXK_HI_OFFSET_X)
#define SKC_TTXK_HI_MASK_Y SKC_BITS_TO_MASK_AT(SKC_TTXK_HI_BITS_Y ,SKC_TTXK_HI_OFFSET_Y)
#define SKC_TTPK_LO_SHL_YX_SPAN (SKC_TTXK_LO_OFFSET_SPAN - SKC_TTXK_HI_OFFSET_X)
#define SKC_TTPK_HI_SHR_YX_SPAN (SKC_TTXK_HI_OFFSET_X + SKC_TTXK_LO_BITS_SPAN)
//
// TTCK (32-BIT COMPARE) v1 -- NOT USED:
//
// 0 63
// | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y |
// +----------------------+--------+--------+-------+-----+-----+
// | 30 | 1 | 1 | 18 | 7 | 7 |
//
//
// TTCK (32-BIT COMPARE) v2 -- NOT USED:
//
// 0 63
// | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y |
// +----------------------+--------+--------+-------+-----+-----+
// | 30 | 1 | 1 | 15 | 9 | 8 |
//
//
// TTCK (64-BIT COMPARE) -- achieves 4K x 4K with an 8x16 tile:
//
// 0 63
// | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y |
// +----------------------+--------+--------+-------+-----+-----+
// | 27 | 1 | 1 | 18 | 9 | 8 |
//
#define SKC_TTCK_BITS_LAYER 18
#define SKC_TTCK_LO_BITS_ID SKC_TTXK_LO_BITS_ID
#define SKC_TTCK_LO_OFFSET_ID 0
#define SKC_TTCK_LO_MASK_ID SKC_BITS_TO_MASK(SKC_TTCK_LO_BITS_ID)
#define SKC_TTCK_LO_BITS_PREFIX 1
#define SKC_TTCK_LO_OFFSET_PREFIX SKC_TTCK_LO_BITS_ID
#define SKC_TTCK_LO_MASK_PREFIX SKC_BITS_TO_MASK_AT(SKC_TTCK_LO_BITS_PREFIX,SKC_TTCK_LO_OFFSET_PREFIX)
#define SKC_TTCK_LO_BITS_ID_PREFIX (SKC_TTCK_LO_BITS_ID + SKC_TTCK_LO_BITS_PREFIX)
#define SKC_TTCK_LO_MASK_ID_PREFIX SKC_BITS_TO_MASK(SKC_TTCK_LO_BITS_ID_PREFIX)
#define SKC_TTCK_LO_BITS_ESCAPE 1
#define SKC_TTCK_LO_OFFSET_ESCAPE SKC_TTCK_LO_BITS_ID_PREFIX
#define SKC_TTCK_LO_MASK_ESCAPE SKC_BITS_TO_MASK_AT(SKC_TTCK_LO_BITS_ESCAPE,SKC_TTCK_LO_OFFSET_ESCAPE)
#define SKC_TTCK_LO_BITS_ID_PREFIX_ESCAPE (SKC_TTCK_LO_BITS_ID_PREFIX + SKC_TTCK_LO_BITS_ESCAPE)
#define SKC_TTCK_HI_OFFSET_Y 24
#define SKC_TTCK_HI_OFFSET_X 15
#define SKC_TTCK_HI_BITS_Y 8
#define SKC_TTCK_HI_BITS_X 9
#define SKC_TTCK_HI_BITS_YX (SKC_TTCK_HI_BITS_X + SKC_TTCK_HI_BITS_Y)
#define SKC_TTCK_HI_MASK_YX SKC_BITS_TO_MASK_AT(SKC_TTCK_HI_BITS_YX,SKC_TTCK_HI_OFFSET_X)
#define SKC_TTCK_HI_BITS_LAYER (32 - SKC_TTCK_HI_BITS_YX)
#define SKC_TTCK_HI_MASK_LAYER SKC_BITS_TO_MASK(SKC_TTCK_HI_BITS_LAYER)
#define SKC_TTCK_HI_SHR_LAYER (SKC_TTCK_HI_BITS_Y + SKC_TTCK_HI_BITS_X + SKC_TTCK_BITS_LAYER - 32)
#define SKC_TTCK_LO_BITS_LAYER (SKC_TTCK_BITS_LAYER - SKC_TTCK_HI_BITS_LAYER)
//
// TILE COORD
//
// 0 32
// | N/A | X | Y |
// +-----+-----+-----+
// | 8 | 12 | 12 |
//
//
// This simplifies the clip test in the place kernel.
//
union skc_tile_coord
{
skc_uint u32;
struct {
#if defined(__OPENCL_C_VERSION__)
skc_uint xy;
#else
skc_uint na0 : 32 - SKC_TTXK_HI_BITS_YX; // 8
skc_uint x : SKC_TTXK_HI_BITS_X; // 12
skc_uint y : SKC_TTXK_HI_BITS_Y; // 12
#endif
};
};
SKC_STATIC_ASSERT(sizeof(union skc_tile_coord) == sizeof(skc_uint));
//
//
//
union skc_tile_clip
{
skc_uint u32a2[2];
skc_uint2 u32v2;
struct {
union skc_tile_coord xy0; // lower left
union skc_tile_coord xy1; // upper right
};
};
SKC_STATIC_ASSERT(sizeof(union skc_tile_clip) == sizeof(skc_uint2));
//
//
//
#endif
//
//
//