blob: 4f705ea925a55e9067952cc8b8d8ecc1a21e057b [file] [log] [blame]
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can
* be found in the LICENSE file.
*
*/
//
//
//
#include <stdlib.h>
#include <stdio.h>
#include "hs/cl/hs_cl.h"
#include "common/cl/assert_cl.h"
#include "composition_cl_12.h"
#include "config_cl.h"
#include "context.h"
#include "raster.h"
#include "handle.h"
#include "runtime_cl_12.h"
#include "common.h"
#include "tile.h"
//
// TTCK (32-BIT COMPARE) v1:
//
// 0 63
// | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y |
// +----------------------+--------+--------+-------+-----+-----+
// | 30 | 1 | 1 | 18 | 7 | 7 |
//
//
// TTCK (32-BIT COMPARE) v2:
//
// 0 63
// | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y |
// +----------------------+--------+--------+-------+-----+-----+
// | 30 | 1 | 1 | 15 | 9 | 8 |
//
//
// TTCK (64-BIT COMPARE) -- achieves 4K x 4K with an 8x16 tile:
//
// 0 63
// | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y |
// +----------------------+--------+--------+-------+-----+-----+
// | 27 | 1 | 1 | 18 | 9 | 8 |
//
union skc_ttck
{
skc_ulong u64;
skc_uint2 u32v2;
struct {
skc_uint id : SKC_TTCK_LO_BITS_ID;
skc_uint prefix : SKC_TTCK_LO_BITS_PREFIX;
skc_uint escape : SKC_TTCK_LO_BITS_ESCAPE;
skc_uint layer_lo : SKC_TTCK_LO_BITS_LAYER;
skc_uint layer_hi : SKC_TTCK_HI_BITS_LAYER;
skc_uint x : SKC_TTCK_HI_BITS_X;
skc_uint y : SKC_TTCK_HI_BITS_Y;
};
struct {
skc_ulong na0 : SKC_TTCK_LO_BITS_ID_PREFIX_ESCAPE;
skc_ulong layer : SKC_TTCK_BITS_LAYER;
skc_ulong na1 : SKC_TTCK_HI_BITS_YX;
};
struct {
skc_uint na2;
skc_uint na3 : SKC_TTCK_HI_BITS_LAYER;
skc_uint yx : SKC_TTCK_HI_BITS_YX;
};
};
//
// FIXME -- accept floats on host but convert to subpixel offsets
// before appending to command ring
//
#define SKC_PLACE_CMD_TX_CONVERT(f) 0
#define SKC_PLACE_CMD_TY_CONVERT(f) 0
//
// COMPOSITION PLACE
//
// This is a snapshot of the host-side command queue.
//
// Note that the composition command extent could be implemented as
// either a mapped buffer or simply copied to an ephemeral extent.
//
// This implementation may vary between compute platforms.
//
struct skc_composition_place
{
struct skc_composition_impl * impl;
cl_command_queue cq;
struct skc_extent_phw1g_tdrNs_snap cmds;
skc_subbuf_id_t id;
};
//
// Forward declarations
//
static
void
skc_composition_unseal_block(struct skc_composition_impl * const impl,
skc_bool const block);
//
//
//
static
void
skc_composition_pfn_release(struct skc_composition_impl * const impl)
{
if (--impl->composition->ref_count != 0)
return;
//
// otherwise, dispose of all resources
//
// the unsealed state is a safe state to dispose of resources
skc_composition_unseal_block(impl,true); // block
struct skc_runtime * const runtime = impl->runtime;
// free host composition
skc_runtime_host_perm_free(runtime,impl->composition);
// release the cq
skc_runtime_release_cq_in_order(runtime,impl->cq);
// release kernels
cl(ReleaseKernel(impl->kernels.place));
cl(ReleaseKernel(impl->kernels.segment));
// release extents
skc_extent_phw1g_tdrNs_free(runtime,&impl->cmds.extent);
skc_extent_phrw_free (runtime,&impl->saved.extent);
skc_extent_phr_pdrw_free (runtime,&impl->atomics);
skc_extent_pdrw_free (runtime,&impl->keys);
skc_extent_pdrw_free (runtime,&impl->offsets);
// free composition impl
skc_runtime_host_perm_free(runtime,impl);
}
//
//
//
static
void
skc_composition_place_grid_pfn_dispose(skc_grid_t const grid)
{
struct skc_composition_place * const place = skc_grid_get_data(grid);
struct skc_composition_impl * const impl = place->impl;
struct skc_runtime * const runtime = impl->runtime;
// release cq
skc_runtime_release_cq_in_order(runtime,place->cq);
// unmap the snapshot (could be a copy)
skc_extent_phw1g_tdrNs_snap_free(runtime,&place->cmds);
// release place struct
skc_runtime_host_temp_free(runtime,place,place->id);
// release impl
skc_composition_pfn_release(impl);
}
//
//
//
static
void
skc_composition_place_read_complete(skc_grid_t const grid)
{
skc_grid_complete(grid);
}
static
void
skc_composition_place_read_cb(cl_event event, cl_int status, skc_grid_t const grid)
{
SKC_CL_CB(status);
struct skc_composition_place * const place = skc_grid_get_data(grid);
struct skc_composition_impl * const impl = place->impl;
struct skc_runtime * const runtime = impl->runtime;
struct skc_scheduler * const scheduler = runtime->scheduler;
// as quickly as possible, enqueue next stage in pipeline to context command scheduler
SKC_SCHEDULER_SCHEDULE(scheduler,skc_composition_place_read_complete,grid);
}
static
void
skc_composition_place_grid_pfn_execute(skc_grid_t const grid)
{
//
// FILLS EXPAND
//
// need result of cmd counts before launching RASTERIZE grids
//
// - OpenCL 1.2: copy atomic counters back to host and launch RASTERIZE grids from host
// - OpenCL 2.x: have a kernel size and launch RASTERIZE grids from device
// - or launch a device-wide grid that feeds itself but that's unsatisfying
//
struct skc_composition_place * const place = skc_grid_get_data(grid);
struct skc_composition_impl * const impl = place->impl;
struct skc_runtime * const runtime = impl->runtime;
skc_uint const work_size = skc_extent_ring_snap_count(place->cmds.snap);
skc_uint4 const clip = { 0, 0, SKC_UINT_MAX, SKC_UINT_MAX };
// initialize kernel args
cl(SetKernelArg(impl->kernels.place,0,SKC_CL_ARG(impl->runtime->block_pool.blocks.drw)));
cl(SetKernelArg(impl->kernels.place,1,SKC_CL_ARG(impl->atomics.drw)));
cl(SetKernelArg(impl->kernels.place,2,SKC_CL_ARG(impl->keys.drw)));
cl(SetKernelArg(impl->kernels.place,3,SKC_CL_ARG(place->cmds.drN)));
cl(SetKernelArg(impl->kernels.place,4,SKC_CL_ARG(runtime->handle_pool.map.drw)));
cl(SetKernelArg(impl->kernels.place,5,SKC_CL_ARG(clip))); // FIXME -- convert the clip to yx0/yx1 format
cl(SetKernelArg(impl->kernels.place,6,SKC_CL_ARG(work_size)));
// launch kernel
skc_device_enqueue_kernel(runtime->device,
SKC_DEVICE_KERNEL_ID_PLACE,
place->cq,
impl->kernels.place,
work_size,
0,NULL,NULL);
//
// copy atomics back after every place launch
//
cl_event complete;
skc_extent_phr_pdrw_read(&impl->atomics,place->cq,&complete);
cl(SetEventCallback(complete,CL_COMPLETE,skc_composition_place_read_cb,grid));
cl(ReleaseEvent(complete));
// flush command queue
cl(Flush(place->cq));
}
//
//
//
static
void
skc_composition_snap(struct skc_composition_impl * const impl)
{
skc_composition_retain(impl->composition);
skc_subbuf_id_t id;
struct skc_composition_place * const place = skc_runtime_host_temp_alloc(impl->runtime,
SKC_MEM_FLAGS_READ_WRITE,
sizeof(*place),&id,NULL);
// save the subbuf id
place->id = id;
// save backpointer
place->impl = impl;
// set grid data
skc_grid_set_data(impl->grids.place,place);
// acquire command queue
place->cq = skc_runtime_acquire_cq_in_order(impl->runtime);
// checkpoint the ring
skc_extent_ring_checkpoint(&impl->cmds.ring);
// make a snapshot
skc_extent_phw1g_tdrNs_snap_init(impl->runtime,&impl->cmds.ring,&place->cmds);
// unmap the snapshot (could be a copy)
skc_extent_phw1g_tdrNs_snap_alloc(impl->runtime,
&impl->cmds.extent,
&place->cmds,
place->cq,
NULL);
skc_grid_force(impl->grids.place);
}
//
//
//
static
void
skc_composition_pfn_seal(struct skc_composition_impl * const impl)
{
// return if sealing or sealed
if (impl->state >= SKC_COMPOSITION_STATE_SEALING)
return;
struct skc_runtime * const runtime = impl->runtime;
struct skc_scheduler * const scheduler = runtime->scheduler;
//
// otherwise, wait for UNSEALING > UNSEALED transition
//
if (impl->state == SKC_COMPOSITION_STATE_UNSEALING)
{
SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->state != SKC_COMPOSITION_STATE_UNSEALED);
}
else // or we were already unsealed
{
// flush is there is work in progress
skc_uint const count = skc_extent_ring_wip_count(&impl->cmds.ring);
if (count > 0) {
skc_composition_snap(impl);
}
}
//
// now unsealed so we need to start sealing...
//
impl->state = SKC_COMPOSITION_STATE_SEALING;
//
// the seal operation implies we should force start all dependencies
// that are still in a ready state
//
skc_grid_force(impl->grids.sort);
}
//
//
//
void
skc_composition_sort_execute_complete(struct skc_composition_impl * const impl)
{
// we're sealed
impl->state = SKC_COMPOSITION_STATE_SEALED;
// this grid is done
skc_grid_complete(impl->grids.sort);
}
static
void
skc_composition_sort_execute_cb(cl_event event, cl_int status, struct skc_composition_impl * const impl)
{
SKC_CL_CB(status);
// as quickly as possible, enqueue next stage in pipeline to context command scheduler
SKC_SCHEDULER_SCHEDULE(impl->runtime->scheduler,skc_composition_sort_execute_complete,impl);
}
static
void
skc_composition_sort_grid_pfn_execute(skc_grid_t const grid)
{
struct skc_composition_impl * const impl = skc_grid_get_data(grid);
struct skc_runtime * const runtime = impl->runtime;
// we should be sealing
assert(impl->state == SKC_COMPOSITION_STATE_SEALING);
struct skc_place_atomics * const atomics = impl->atomics.hr;
#ifndef NDEBUG
fprintf(stderr,"composition sort: %u\n",atomics->keys);
#endif
if (atomics->keys > 0)
{
uint32_t keys_padded_in, keys_padded_out;
hs_cl_pad(runtime->hs,atomics->keys,&keys_padded_in,&keys_padded_out);
hs_cl_sort(impl->runtime->hs,
impl->cq,
0,NULL,NULL,
impl->keys.drw,
NULL,
atomics->keys,
keys_padded_in,
keys_padded_out,
false);
cl(SetKernelArg(impl->kernels.segment,0,SKC_CL_ARG(impl->keys.drw)));
cl(SetKernelArg(impl->kernels.segment,1,SKC_CL_ARG(impl->offsets.drw)));
cl(SetKernelArg(impl->kernels.segment,2,SKC_CL_ARG(impl->atomics.drw)));
// find start of each tile
skc_device_enqueue_kernel(runtime->device,
SKC_DEVICE_KERNEL_ID_SEGMENT_TTCK,
impl->cq,
impl->kernels.segment,
atomics->keys,
0,NULL,NULL);
}
cl_event complete;
// next stage needs to know number of key segments
skc_extent_phr_pdrw_read(&impl->atomics,impl->cq,&complete);
// register a callback
cl(SetEventCallback(complete,CL_COMPLETE,skc_composition_sort_execute_cb,impl));
cl(ReleaseEvent(complete));
// flush cq
cl(Flush(impl->cq));
}
//
//
//
static
void
skc_composition_raster_release(struct skc_composition_impl * const impl)
{
//
// reference counts to rasters can only be released when the
// composition is unsealed and the atomics are reset.
//
skc_runtime_raster_device_release(impl->runtime,
impl->saved.extent.hrw,
impl->saved.count);
// reset count
impl->saved.count = 0;
}
//
//
//
static
void
skc_composition_unseal_block(struct skc_composition_impl * const impl,
skc_bool const block)
{
// return if already unsealed
if (impl->state == SKC_COMPOSITION_STATE_UNSEALED)
return;
//
// otherwise, we're going to need to pump the scheduler
//
struct skc_scheduler * const scheduler = impl->runtime->scheduler;
//
// wait for UNSEALING > UNSEALED transition
//
if (impl->state == SKC_COMPOSITION_STATE_UNSEALING)
{
if (block) {
SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->state != SKC_COMPOSITION_STATE_UNSEALED);
}
return;
}
//
// wait for SEALING > SEALED transition ...
//
if (impl->state == SKC_COMPOSITION_STATE_SEALING)
{
// wait if sealing
SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->state != SKC_COMPOSITION_STATE_SEALED);
}
// wait for rendering locks to be released
SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->lock_count > 0);
//
// no need to visit UNSEALING state with this implementation
//
// acquire a new grid
impl->grids.sort = SKC_GRID_DEPS_ATTACH(impl->runtime->deps,
NULL, // the composition state guards this
impl,
NULL, // no waiting
skc_composition_sort_grid_pfn_execute,
NULL); // no dispose
// mark composition as unsealed
impl->state = SKC_COMPOSITION_STATE_UNSEALED;
}
//
// can only be called on a composition that was just unsealed
//
static
void
skc_composition_reset(struct skc_composition_impl * const impl)
{
// zero the atomics
skc_extent_phr_pdrw_zero(&impl->atomics,impl->cq,NULL);
// flush it
cl(Flush(impl->cq));
// release all the rasters
skc_composition_raster_release(impl);
}
static
void
skc_composition_unseal_block_reset(struct skc_composition_impl * const impl,
skc_bool const block,
skc_bool const reset)
{
skc_composition_unseal_block(impl,block);
if (reset) {
skc_composition_reset(impl);
}
}
//
//
//
static
void
skc_composition_pfn_unseal(struct skc_composition_impl * const impl, skc_bool const reset)
{
skc_composition_unseal_block_reset(impl,false,reset);
}
//
// only needs to create a grid
//
static
void
skc_composition_place_create(struct skc_composition_impl * const impl)
{
// acquire a grid
impl->grids.place = SKC_GRID_DEPS_ATTACH(impl->runtime->deps,
&impl->grids.place,
NULL,
NULL, // no waiting
skc_composition_place_grid_pfn_execute,
skc_composition_place_grid_pfn_dispose);
// assign happens-after relationship
skc_grid_happens_after_grid(impl->grids.sort,impl->grids.place);
}
static
skc_err
skc_composition_pfn_place(struct skc_composition_impl * const impl,
skc_raster_t const * rasters,
skc_layer_id const * layer_ids,
skc_float const * txs,
skc_float const * tys,
skc_uint count)
{
// block and yield if not unsealed
skc_composition_unseal_block(impl,true);
//
// validate and retain all rasters
//
skc_err err;
err = skc_runtime_handle_device_validate_retain(impl->runtime,
SKC_TYPED_HANDLE_TYPE_IS_RASTER,
rasters,
count);
if (err)
return err;
skc_runtime_handle_device_retain(impl->runtime,rasters,count);
//
// save the stripped handles
//
skc_raster_t * saved = impl->saved.extent.hrw;
saved += impl->saved.count;
impl->saved.count += count;
for (skc_uint ii=0; ii<count; ii++) {
saved[ii] = SKC_TYPED_HANDLE_TO_HANDLE(*rasters++);
}
//
// - declare the place grid happens after the raster
// - copy place commands into ring
//
do {
skc_uint rem;
// find out how much room is left in then ring's snap
// if the place ring is full -- let it drain
SKC_SCHEDULER_WAIT_WHILE(impl->runtime->scheduler,(rem = skc_extent_ring_wip_rem(&impl->cmds.ring)) == 0);
// append commands
skc_uint avail = min(rem,count);
// decrement count
count -= avail;
// launch a place kernel after copying commands?
skc_bool const is_wip_full = (avail == rem);
// if there is no place grid then create one
if (impl->grids.place == NULL)
{
skc_composition_place_create(impl);
}
//
// FIXME -- OPTIMIZATION? -- the ring_wip_index_inc() test can
// be avoided by splitting into at most two intervals. It should
// be plenty fast as is though so leave for now.
//
union skc_cmd_place * const cmds = impl->cmds.extent.hw1;
if ((txs == NULL) && (tys == NULL))
{
while (avail-- > 0)
{
skc_raster_t const raster = *saved++;
skc_grid_happens_after_handle(impl->grids.place,raster);
cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] =
(union skc_cmd_place){ raster, *layer_ids++, 0, 0 };
}
}
else if (txs == NULL)
{
while (avail-- > 0)
{
skc_raster_t const raster = *saved++;
skc_grid_happens_after_handle(impl->grids.place,raster);
cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] =
(union skc_cmd_place){ raster,
*layer_ids++,
0,
SKC_PLACE_CMD_TY_CONVERT(*tys++) };
}
}
else if (tys == NULL)
{
while (avail-- > 0)
{
skc_raster_t const raster = *saved++;
skc_grid_happens_after_handle(impl->grids.place,raster);
cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] =
(union skc_cmd_place){ raster,
*layer_ids++,
SKC_PLACE_CMD_TX_CONVERT(*txs++),
0 };
}
}
else
{
while (avail-- > 0)
{
skc_raster_t const raster = *saved++;
skc_grid_happens_after_handle(impl->grids.place,raster);
cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] =
(union skc_cmd_place){ raster,
*layer_ids++,
SKC_PLACE_CMD_TX_CONVERT(*txs++),
SKC_PLACE_CMD_TY_CONVERT(*tys++) };
}
}
// launch place kernel?
if (is_wip_full) {
skc_composition_snap(impl);
}
} while (count > 0);
return SKC_ERR_SUCCESS;
}
//
//
//
static
void
skc_composition_pfn_bounds(struct skc_composition_impl * const impl, skc_int bounds[4])
{
//
// FIXME -- not implemented yet
//
// impl bounds will be copied back after sealing
//
bounds[0] = SKC_INT_MIN;
bounds[1] = SKC_INT_MIN;
bounds[2] = SKC_INT_MAX;
bounds[3] = SKC_INT_MAX;
}
//
//
//
void
skc_composition_retain_and_lock(struct skc_composition * const composition)
{
skc_composition_retain(composition);
composition->impl->lock_count += 1;
}
void
skc_composition_unlock_and_release(struct skc_composition * const composition)
{
composition->impl->lock_count -= 1;
skc_composition_pfn_release(composition->impl);
}
//
//
//
skc_err
skc_composition_cl_12_create(struct skc_context * const context,
struct skc_composition * * const composition)
{
struct skc_runtime * const runtime = context->runtime;
// retain the context
// skc_context_retain(context);
// allocate impl
struct skc_composition_impl * const impl = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(*impl));
// allocate composition
(*composition) = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(**composition));
(*composition)->context = context;
(*composition)->impl = impl;
(*composition)->ref_count = 1;
(*composition)->place = skc_composition_pfn_place;
(*composition)->unseal = skc_composition_pfn_unseal;
(*composition)->seal = skc_composition_pfn_seal;
(*composition)->bounds = skc_composition_pfn_bounds;
(*composition)->release = skc_composition_pfn_release;
// intialize impl
impl->composition = (*composition);
impl->runtime = runtime;
SKC_ASSERT_STATE_INIT(impl,SKC_COMPOSITION_STATE_SEALED);
impl->lock_count = 0;
impl->grids.sort = NULL;
impl->grids.place = NULL;
// acquire command queue for sealing/unsealing
impl->cq = skc_runtime_acquire_cq_in_order(runtime);
// acquire kernels
impl->kernels.place = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_PLACE);
impl->kernels.segment = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_SEGMENT_TTCK);
// get config
struct skc_config const * const config = runtime->config;
// initialize ring size with config values
skc_extent_ring_init(&impl->cmds.ring,
config->composition.cmds.elem_count,
config->composition.cmds.snap_count,
sizeof(union skc_cmd_place));
skc_extent_phw1g_tdrNs_alloc(runtime,&impl->cmds.extent ,sizeof(union skc_cmd_place) * config->composition.cmds.elem_count);
skc_extent_phrw_alloc (runtime,&impl->saved.extent,sizeof(skc_raster_t) * config->composition.raster_ids.elem_count);
skc_extent_phr_pdrw_alloc (runtime,&impl->atomics ,sizeof(struct skc_place_atomics));
skc_extent_pdrw_alloc (runtime,&impl->keys ,sizeof(skc_ttxk_t) * config->composition.keys.elem_count);
skc_extent_pdrw_alloc (runtime,&impl->offsets ,sizeof(skc_uint) * (1u << SKC_TTCK_HI_BITS_YX)); // 1MB
// nothing saved
impl->saved.count = 0;
// unseal the composition, zero the atomics, etc.
skc_composition_unseal_block_reset(impl,false,true);
return SKC_ERR_SUCCESS;
}
//
//
//