| /* |
| * Copyright 2017 Google Inc. |
| * |
| * Use of this source code is governed by a BSD-style license that can |
| * be found in the LICENSE file. |
| * |
| */ |
| |
| // |
| // |
| // |
| |
| // get rid of these |
| #include <stdio.h> |
| #include <stdlib.h> |
| |
| // |
| // |
| // |
| |
| #include "hs/cl/hs_cl.h" |
| |
| #include "common/cl/assert_cl.h" |
| |
| #include "context.h" |
| #include "grid.h" |
| #include "raster.h" |
| #include "extent_ring.h" |
| #include "raster_builder.h" |
| |
| #include "tile.h" |
| |
| #include "config_cl.h" |
| #include "runtime_cl_12.h" |
| #include "extent_cl_12.h" |
| #include "raster_builder_cl_12.h" |
| |
| // |
| // RASTERIZATION SUB-PIPELINE |
| // -------------------------- |
| // |
| // Phase 1: expand commands |
| // |
| // Phase 2: rasterize |
| // |
| // Phase 3: sort & segment || release paths |
| // |
| // Phase 4: prefix |
| // |
| // Phase 5: release rasters |
| // |
| // RASTER COHORT |
| // ============== |
| // |
| // BUILDER RASTERIZER POST PROCESSING |
| // <-----------------------------------------------> <------------> <---------------------------------------------------------------------> |
| // |
| // fill cmds transforms raster clips path release rasterize cmds cohort map raster release TTSB TTSK cohort atomics context atomics |
| // --------- ---------- ------------ ------------ -------------- ---------- -------------- ---- ---- -------------- --------------- |
| // 1,2 1,2 1,2 1,2 2 1-4 1,2,3,4 2-4 2-4 2-4 global |
| // |
| // |
| // NOTES: FINE-GRAINED SVM |
| // ----------------------- |
| // |
| // 1) In a fine-grained system we know the exact number of |
| // rasterize cmds per segment type before phase 1 |
| // |
| // 2) A raster that's "under construction" shouldn't be rasterized |
| // until it is complete. This implies that a raster is not part |
| // of a cohort until it is complete. The raster builder must |
| // handle raster promises being "forced" to completion -- this is |
| // likely the result of composition construction and subsequent |
| // rendering to a surface. |
| // |
| // 3) The raster cohort rasterizer state retains the fill cmd, |
| // transform, raster clip and path release "ring" extents. |
| // |
| // 4) The rasterize cmd extent sizes (line, quad, cubic, rational |
| // quad, rational cubic) are known ahead of time. |
| // |
| // 5) The raster cohort post processor is standalone and retains the |
| // raster_map, cohort atomics, TTSK_RYX extent, and raster |
| // references until complete. |
| // |
| |
| // |
| // Notes: |
| // |
| // - Could have a pipeline stage before expansion count the exact |
| // number of line/quad/cubic commands but the command buffers are |
| // relatively small (64-bit commands * # of path segments). |
| // |
| |
| // raster |
| // cohort atomics path_ids raster_ids transforms clips cmds_fill cmds_l/q/c ttsk_ryx |
| // |
| // |
| // BEGIN ^ |
| // | |
| // EXPAND | |
| // | |
| // RASTERIZE | |
| // | |
| // SORT || RELEASE PATHS | |
| // | |
| // PREFIX | |
| // | |
| // RELEASE RASTERS | |
| // | |
| // END v |
| // |
| // |
| // BEGIN |
| // |
| // EXPAND -- PRODUCES: one or more extents of rasterization commands |
| // |
| // RASTERIZE -- DEPENDENCY: requires size of command extents before launching |
| // -- PRODUCES: an extent of ttsk_ryx keys |
| // |
| // SORT || RELEASE PATHS -- DEPENDENCY: requires size of key extent before launching |
| // -- PRODUCES: sorted array of keys |
| // |
| // PREFIX -- DEPENDENCY: none -- can execute after SORT because grid size is number of rasters |
| // |
| // RELEASE RASTERS -- DEPENDENCY: none -- can execute after prefix |
| // |
| // END |
| // |
| |
| // ------------------------ |
| // |
| // DEPENDENCY is cleanly implemented with a host callback or device kernel launcher |
| // |
| // Can this hide resource acquisition? Yes. But there are two cases: |
| // |
| // 1. acqusition of resources occurs on the host thread and lack of |
| // resources drains the host command queue until resources are |
| // available (OpenCL 2.x) |
| // |
| // 2. the host commands lazily acquire resources (OpenCL 1.2) |
| // |
| // ------------------------ |
| // |
| // How to express? |
| // |
| // Each substage launches its successors. This supports both dependency models. |
| // |
| // If OpenCL 1.2 then the substage can't be launched until the prior |
| // stage's event is complete. So this requires registering a callback |
| // to invoke the substage. |
| // |
| // ------------------------ |
| |
| // |
| // BUILD |
| // |
| |
| struct skc_raster_builder_impl |
| { |
| struct skc_raster_builder * raster_builder; |
| struct skc_runtime * runtime; |
| |
| skc_grid_t cohort; |
| |
| // these are all durable/perm extents |
| struct skc_extent_phrwg_thr1s path_ids; // read/write by host |
| struct skc_extent_phw1g_tdrNs transforms; // write once by host + read by device |
| struct skc_extent_phw1g_tdrNs clips; // write once by host + read by device |
| struct skc_extent_phw1g_tdrNs fill_cmds; // write once by host + read by device |
| struct skc_extent_phrwg_tdrNs raster_ids; // read/write by host + read by device |
| |
| struct { |
| cl_kernel fills_expand; |
| cl_kernel rasterize_all; |
| cl_kernel segment; |
| cl_kernel rasters_alloc; |
| cl_kernel prefix; |
| } kernels; |
| }; |
| |
| // |
| // RASTER COHORT |
| // |
| // This sub-pipeline snapshots the raster builder and then acquires |
| // and releases host and device resources as necessary (as late as |
| // possible). |
| // |
| // Note that the cohort extents are ephemeral and are only used by one |
| // or more stages of a the rasterization sub-pipeline. |
| // |
| // The pipeline implementation may vary between compute platforms. |
| // |
| |
| struct skc_raster_cohort |
| { |
| struct skc_raster_builder_impl * impl; |
| |
| struct skc_extent_phrwg_thr1s_snap path_ids; // read/write by host |
| struct skc_extent_phw1g_tdrNs_snap transforms; // write once by host + read by device |
| struct skc_extent_phw1g_tdrNs_snap clips; // write once by host + read by device |
| struct skc_extent_phw1g_tdrNs_snap fill_cmds; // write once by host + read by device |
| struct skc_extent_phrwg_tdrNs_snap raster_ids; // read/write by host + read by device |
| |
| cl_command_queue cq; |
| |
| // sub-pipeline atomics |
| struct skc_extent_thr_tdrw atomics; |
| |
| // path primitives are expanded into line/quad/cubic/rational cmds |
| struct skc_extent_tdrw cmds; |
| |
| // rasterization output |
| struct skc_extent_tdrw keys; |
| // struct skc_extent_thrw_tdrw keys; |
| |
| // post-sort extent with metadata for each raster |
| struct skc_extent_tdrw metas; |
| // struct skc_extent_thrw_tdrw metas; |
| |
| // subbuf id |
| skc_subbuf_id_t id; |
| |
| // |
| // pipeline also uses the following global resources: |
| // |
| // - command queue from global factory |
| // - global block pool and its atomics |
| // - global path and raster host id map |
| // - temporary host and device allocations |
| // |
| }; |
| |
| // |
| // TTRK (64-BIT COMPARE) |
| // |
| // 0 63 |
| // | TTSB ID | X | Y | COHORT ID | |
| // +---------+------+------+-----------+ |
| // | 27 | 12 | 12 | 13 | |
| // |
| // |
| // TTRK (32-BIT COMPARE) |
| // |
| // 0 63 |
| // | TTSB ID | N/A | X | Y | COHORT ID | |
| // +---------+-----+------+------+-----------+ |
| // | 27 | 5 | 12 | 12 | 8 | |
| // |
| |
| // |
| // TTRK is sortable intermediate key format for TTSK |
| // |
| // We're going to use the 32-bit comparison version for now |
| // |
| |
| union skc_ttrk |
| { |
| skc_ulong u64; |
| skc_uint2 u32v2; |
| |
| struct { |
| skc_uint block : SKC_TTXK_LO_BITS_ID; |
| skc_uint na0 : SKC_TTRK_LO_BITS_NA; |
| skc_uint x : SKC_TTXK_HI_BITS_X; |
| skc_uint y : SKC_TTXK_HI_BITS_Y; |
| skc_uint cohort : SKC_TTRK_HI_BITS_COHORT; |
| }; |
| |
| struct { |
| skc_uint na1; |
| skc_uint yx : SKC_TTXK_HI_BITS_YX; |
| skc_uint na2 : SKC_TTRK_HI_BITS_COHORT; |
| }; |
| |
| struct { |
| skc_uint na3; |
| skc_uint na4 : SKC_TTXK_HI_BITS_X; |
| skc_uint cohort_y : SKC_TTRK_HI_BITS_COHORT_Y; |
| }; |
| }; |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_raster_builder_pfn_release(struct skc_raster_builder_impl * const impl) |
| { |
| // decrement reference count |
| if (--impl->raster_builder->refcount != 0) |
| return; |
| |
| // |
| // otherwise, dispose of the the raster builder and its impl |
| // |
| struct skc_runtime * const runtime = impl->runtime; |
| |
| // free the raster builder |
| skc_runtime_host_perm_free(runtime,impl->raster_builder); |
| |
| // free durable/perm extents |
| skc_extent_phrwg_thr1s_free(runtime,&impl->path_ids); |
| skc_extent_phw1g_tdrNs_free(runtime,&impl->transforms); |
| skc_extent_phw1g_tdrNs_free(runtime,&impl->clips); |
| skc_extent_phw1g_tdrNs_free(runtime,&impl->fill_cmds); |
| skc_extent_phrwg_tdrNs_free(runtime,&impl->raster_ids); |
| |
| // release kernels |
| cl(ReleaseKernel(impl->kernels.fills_expand)); |
| cl(ReleaseKernel(impl->kernels.rasterize_all)); |
| |
| #if 0 |
| cl(ReleaseKernel(impl->kernels.rasterize_lines)); |
| cl(ReleaseKernel(impl->kernels.rasterize_quads)); |
| cl(ReleaseKernel(impl->kernels.rasterize_cubics)); |
| #endif |
| |
| cl(ReleaseKernel(impl->kernels.segment)); |
| cl(ReleaseKernel(impl->kernels.rasters_alloc)); |
| cl(ReleaseKernel(impl->kernels.prefix)); |
| |
| // free the impl |
| skc_runtime_host_perm_free(runtime,impl); |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_raster_builder_rasters_release(struct skc_runtime * const runtime, |
| skc_raster_t const * const rasters, |
| skc_uint const size, |
| skc_uint const from, |
| skc_uint const to) |
| { |
| if (from <= to) // no wrap |
| { |
| skc_raster_t const * rasters_from = rasters + from; |
| skc_uint count_from = to - from; |
| |
| skc_grid_deps_unmap(runtime->deps,rasters_from,count_from); |
| skc_runtime_raster_device_release(runtime,rasters_from,count_from); |
| } |
| else // from > to implies wrap |
| { |
| skc_raster_t const * rasters_lo = rasters + from; |
| skc_uint count_lo = size - from; |
| |
| skc_grid_deps_unmap(runtime->deps,rasters_lo,count_lo); |
| skc_runtime_raster_device_release(runtime,rasters_lo,count_lo); |
| |
| skc_grid_deps_unmap(runtime->deps,rasters,to); |
| skc_runtime_raster_device_release(runtime,rasters,to); |
| } |
| } |
| |
| static |
| void |
| skc_raster_builder_paths_release(struct skc_runtime * const runtime, |
| struct skc_extent_phrwg_thr1s_snap * const snap) |
| { |
| // release lo |
| skc_runtime_path_device_release(runtime,snap->hr1.lo,snap->count.lo); |
| |
| // release hi |
| if (snap->count.hi) |
| skc_runtime_path_device_release(runtime,snap->hr1.hi,snap->count.hi); |
| } |
| |
| static |
| void |
| skc_raster_builder_cohort_grid_pfn_dispose(skc_grid_t const grid) |
| { |
| // |
| // ALLOCATED RESOURCES |
| // |
| // path_ids - |
| // raster_ids a |
| // transforms - |
| // clips - |
| // fill_cmds - |
| // cq a |
| // cohort atomics a |
| // cmds - |
| // keys a |
| // meta a |
| // |
| |
| struct skc_raster_cohort * const cohort = skc_grid_get_data(grid); |
| struct skc_raster_builder_impl * const impl = cohort->impl; |
| struct skc_runtime * const runtime = impl->runtime; |
| |
| // |
| // release paths -- FIXME -- Note that releasing paths can be |
| // performed after rasterization is complete |
| // |
| |
| // snap alloc the paths -- this host snap simply sets up pointers |
| skc_extent_phrwg_thr1s_snap_alloc(runtime,&impl->path_ids,&cohort->path_ids); |
| |
| // unmap and release raster ids |
| skc_raster_builder_paths_release(runtime,&cohort->path_ids); |
| |
| // release path ids |
| skc_extent_phrwg_thr1s_snap_free(runtime,&cohort->path_ids); |
| |
| // |
| // release rasters |
| // |
| skc_uint const size = cohort->raster_ids.snap->ring->size.pow2; |
| skc_uint const from = skc_extent_ring_snap_from(cohort->raster_ids.snap); |
| skc_uint const to = skc_extent_ring_snap_to(cohort->raster_ids.snap); |
| |
| // unmap and release raster ids |
| skc_raster_builder_rasters_release(runtime,impl->raster_ids.hrw,size,from,to); |
| |
| // release cohort's remaining allocated resources |
| skc_extent_phrwg_tdrNs_snap_free(runtime,&cohort->raster_ids); |
| skc_runtime_release_cq_in_order(runtime,cohort->cq); |
| skc_extent_thr_tdrw_free(runtime,&cohort->atomics); |
| skc_extent_tdrw_free(runtime,&cohort->keys); |
| skc_extent_tdrw_free(runtime,&cohort->metas); |
| // skc_extent_thrw_tdrw_free(runtime,&cohort->keys); |
| // skc_extent_thrw_tdrw_free(runtime,&cohort->metas); |
| skc_runtime_host_temp_free(runtime,cohort,cohort->id); |
| |
| // release the raster builder |
| skc_raster_builder_pfn_release(impl); |
| |
| // |
| // ALLOCATED RESOURCES |
| // |
| // path_ids - |
| // raster_ids - |
| // transforms - |
| // clips - |
| // fill_cmds - |
| // cq - |
| // cohort atomics - |
| // cmds - |
| // keys - |
| // meta - |
| // |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_raster_cohort_prefix_release(skc_grid_t const grid) |
| { |
| // FIXME -- note that pfn_dispose can be accomplished here |
| |
| // release the grid |
| skc_grid_complete(grid); |
| } |
| |
| static |
| void |
| skc_raster_cohort_prefix_cb(cl_event event, cl_int status, skc_grid_t const grid) |
| { |
| SKC_CL_CB(status); |
| |
| struct skc_raster_cohort * const cohort = skc_grid_get_data(grid); |
| struct skc_scheduler * const scheduler = cohort->impl->runtime->scheduler; |
| |
| // as quickly as possible, enqueue next stage in pipeline to context command scheduler |
| SKC_SCHEDULER_SCHEDULE(scheduler,skc_raster_cohort_prefix_release,grid); |
| } |
| |
| // |
| // |
| // |
| |
| #if 0 |
| static |
| int cmp64(const void * ptr_a, const void * ptr_b) |
| { |
| skc_ulong const a = *(const skc_ulong *)ptr_a; |
| skc_ulong const b = *(const skc_ulong *)ptr_b; |
| |
| if (a < b) return -1; |
| if (a > b) return +1; |
| else return 0; |
| } |
| #endif |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_raster_cohort_sort_prefix(skc_grid_t const grid) |
| { |
| // |
| // ALLOCATED RESOURCES |
| // |
| // path_ids i |
| // raster_ids i |
| // transforms a |
| // clips a |
| // fill_cmds - |
| // cq a |
| // cohort atomics a |
| // cmds a |
| // keys a |
| // meta - |
| // |
| |
| // use the backpointers |
| struct skc_raster_cohort * const cohort = skc_grid_get_data(grid); |
| struct skc_raster_builder_impl * const impl = cohort->impl; |
| struct skc_runtime * const runtime = impl->runtime; |
| |
| // release transforms |
| skc_extent_phw1g_tdrNs_snap_free(runtime,&cohort->transforms); |
| |
| // release clips |
| skc_extent_phw1g_tdrNs_snap_free(runtime,&cohort->clips); |
| |
| // release expanded cmds |
| skc_extent_tdrw_free(runtime,&cohort->cmds); |
| |
| // alloc the snapshost -- could be zero-sized |
| skc_extent_phrwg_tdrNs_snap_alloc(runtime, |
| &impl->raster_ids, |
| &cohort->raster_ids, |
| cohort->cq,NULL); |
| |
| // will never be zero |
| skc_uint const rasters = skc_extent_ring_snap_count(cohort->raster_ids.snap); |
| |
| // acquire fixed-size device-side extent |
| skc_extent_tdrw_alloc(runtime, |
| &cohort->metas, |
| sizeof(struct skc_raster_cohort_meta)); |
| |
| // skc_extent_thrw_tdrw_alloc(runtime, |
| // &cohort->metas, |
| // sizeof(struct skc_raster_cohort_meta)); |
| |
| // zero the metas |
| skc_extent_tdrw_zero(&cohort->metas,cohort->cq,NULL); |
| |
| // get the read-only host copy of the device atomics |
| struct skc_raster_cohort_atomic const * const atomics = cohort->atomics.hr; |
| |
| // |
| // SORT |
| // |
| if (atomics->keys > 0) |
| { |
| #ifndef NDEBUG |
| fprintf(stderr,"raster cohort sort: %u\n",atomics->keys); |
| #endif |
| |
| // |
| // |
| // |
| uint32_t keys_padded_in, keys_padded_out; |
| |
| hs_cl_pad(runtime->hs,atomics->keys,&keys_padded_in,&keys_padded_out); |
| |
| hs_cl_sort(runtime->hs, |
| cohort->cq, |
| 0,NULL,NULL, |
| cohort->keys.drw, |
| NULL, |
| atomics->keys, |
| keys_padded_in, |
| keys_padded_out, |
| false); |
| |
| cl(SetKernelArg(impl->kernels.segment,0,SKC_CL_ARG(cohort->keys.drw))); |
| cl(SetKernelArg(impl->kernels.segment,1,SKC_CL_ARG(cohort->metas.drw))); |
| |
| #ifndef NDEBUG |
| fprintf(stderr,"post-sort\n"); |
| #endif |
| |
| // find start of each tile |
| skc_device_enqueue_kernel(runtime->device, |
| SKC_DEVICE_KERNEL_ID_SEGMENT_TTRK, |
| cohort->cq, |
| impl->kernels.segment, |
| atomics->keys, |
| 0,NULL,NULL); |
| |
| #ifndef NDEBUG |
| fprintf(stderr,"post-segment\n"); |
| #endif |
| |
| // |
| // DELETE ALL THIS WHEN READY |
| // |
| |
| #if 0 |
| // |
| // |
| // |
| cl(Finish(cohort->cq)); |
| |
| // map keys to host |
| union skc_ttrk * const keys = skc_extent_thrw_tdrw_map(&cohort->keys, |
| cohort->cq, |
| NULL); |
| // map meta to host |
| struct skc_raster_cohort_meta * const metas = skc_extent_thrw_tdrw_map(&cohort->metas, |
| cohort->cq, |
| NULL); |
| // block until done |
| cl(Finish(cohort->cq)); |
| |
| // sort keys |
| qsort(keys,atomics->keys,sizeof(*keys),cmp64); |
| |
| // mask to determine if rk id is a new block |
| skc_uint const subblock_mask = runtime->config->block.subblocks - 1; |
| |
| // |
| // some counters |
| // |
| union skc_raster_cohort_meta_in meta_in = { |
| .blocks = 0, |
| .offset = 0, |
| .pk = 0, |
| .rk = 0 |
| }; |
| |
| // get first key |
| union skc_ttrk curr = keys[0]; |
| |
| skc_uint ii=0, jj=0; |
| |
| // for all TTRK keys |
| while (true) |
| { |
| // increment ttrk count |
| meta_in.rk += 1; |
| |
| // was this a new block? |
| if ((curr.u32v2.lo & subblock_mask) == 0) |
| meta_in.blocks += 1; |
| |
| // break if we're out of keys |
| if (++ii >= atomics->keys) |
| break; |
| |
| // otherwise, process next key |
| union skc_ttrk const next = keys[ii]; |
| |
| // if new cohort then save curr meta and init next meta |
| if (next.cohort != curr.cohort) |
| { |
| fprintf(stderr,"[ %u, %u, %u, %u ]\n", |
| meta_in.blocks, |
| meta_in.offset, |
| meta_in.pk, |
| meta_in.rk); |
| |
| // store back to buffer |
| metas->inout[curr.cohort].in = meta_in; |
| |
| // update meta_in |
| meta_in.blocks = 0; |
| meta_in.offset = ii; |
| meta_in.pk = 0; |
| meta_in.rk = 0; |
| } |
| // otherwise, if same y but new x then increment TTPK count |
| else if ((next.y == curr.y) && (next.x != curr.x)) |
| { |
| meta_in.pk += 1; |
| |
| #if 0 |
| fprintf(stderr,"%3u : %3u : ( %3u, %3u ) -> ( %3u )\n", |
| jj++,curr.cohort,curr.y,curr.x,next.x); |
| #endif |
| } |
| |
| #if 0 |
| fprintf(stderr,"( %3u, %3u )\n",next.y,next.x); |
| #endif |
| |
| curr = next; |
| } |
| |
| fprintf(stderr,"[ %u, %u, %u, %u ]\n", |
| meta_in.blocks, |
| meta_in.offset, |
| meta_in.pk, |
| meta_in.rk); |
| |
| // store back to buffer |
| metas->inout[curr.cohort].in = meta_in; |
| |
| |
| // unmap |
| skc_extent_thrw_tdrw_unmap(&cohort->keys, |
| keys, |
| cohort->cq, |
| NULL); |
| |
| // unmap |
| skc_extent_thrw_tdrw_unmap(&cohort->metas, |
| metas, |
| cohort->cq, |
| NULL); |
| #endif |
| } |
| |
| #ifndef NDEBUG |
| fprintf(stderr,"rasters_alloc: %u\n",rasters); |
| #endif |
| |
| // |
| // RASTER ALLOC/INIT |
| // |
| cl(SetKernelArg(impl->kernels.rasters_alloc,0,SKC_CL_ARG(runtime->block_pool.atomics.drw))); |
| cl(SetKernelArg(impl->kernels.rasters_alloc,1,SKC_CL_ARG(runtime->block_pool.ids.drw))); |
| cl(SetKernelArg(impl->kernels.rasters_alloc,2,SKC_CL_ARG(runtime->block_pool.size->ring_mask))); |
| cl(SetKernelArg(impl->kernels.rasters_alloc,3,SKC_CL_ARG(runtime->handle_pool.map.drw))); |
| cl(SetKernelArg(impl->kernels.rasters_alloc,4,SKC_CL_ARG(cohort->metas.drw))); |
| cl(SetKernelArg(impl->kernels.rasters_alloc,5,SKC_CL_ARG(cohort->raster_ids.drN))); |
| cl(SetKernelArg(impl->kernels.rasters_alloc,6,SKC_CL_ARG(rasters))); |
| |
| skc_device_enqueue_kernel(runtime->device, |
| SKC_DEVICE_KERNEL_ID_RASTERS_ALLOC, |
| cohort->cq, |
| impl->kernels.rasters_alloc, |
| rasters, |
| 0,NULL,NULL); |
| |
| #ifndef NDEBUG |
| fprintf(stderr,"post-alloc\n"); |
| #endif |
| |
| // |
| // PREFIX |
| // |
| cl(SetKernelArg(impl->kernels.prefix,0,SKC_CL_ARG(runtime->block_pool.atomics.drw))); |
| cl(SetKernelArg(impl->kernels.prefix,1,SKC_CL_ARG(runtime->block_pool.ids.drw))); |
| cl(SetKernelArg(impl->kernels.prefix,2,SKC_CL_ARG(runtime->block_pool.blocks.drw))); |
| cl(SetKernelArg(impl->kernels.prefix,3,SKC_CL_ARG(runtime->block_pool.size->ring_mask))); |
| |
| cl(SetKernelArg(impl->kernels.prefix,4,SKC_CL_ARG(cohort->keys.drw))); |
| cl(SetKernelArg(impl->kernels.prefix,5,SKC_CL_ARG(runtime->handle_pool.map.drw))); |
| |
| cl(SetKernelArg(impl->kernels.prefix,6,SKC_CL_ARG(cohort->metas.drw))); |
| cl(SetKernelArg(impl->kernels.prefix,7,SKC_CL_ARG(rasters))); |
| |
| cl_event complete; |
| |
| skc_device_enqueue_kernel(runtime->device, |
| SKC_DEVICE_KERNEL_ID_PREFIX, |
| cohort->cq, |
| impl->kernels.prefix, |
| rasters, |
| 0,NULL, |
| &complete); |
| |
| cl(SetEventCallback(complete,CL_COMPLETE,skc_raster_cohort_prefix_cb,grid)); |
| cl(ReleaseEvent(complete)); |
| |
| #ifndef NDEBUG |
| fprintf(stderr,"post-prefix\n"); |
| #endif |
| |
| // flush command queue |
| cl(Flush(cohort->cq)); |
| |
| // |
| // ALLOCATED RESOURCES |
| // |
| // path_ids a |
| // raster_ids a |
| // transforms - |
| // clips - |
| // fill_cmds - |
| // cq a |
| // cohort atomics a |
| // cmds - |
| // keys a |
| // meta a |
| // |
| } |
| |
| static |
| void |
| skc_raster_cohort_rasterize_cb(cl_event event, cl_int status, skc_grid_t const grid) |
| { |
| SKC_CL_CB(status); |
| |
| struct skc_raster_cohort * const cohort = skc_grid_get_data(grid); |
| |
| // as quickly as possible, enqueue next stage in pipeline to context command scheduler |
| SKC_SCHEDULER_SCHEDULE(cohort->impl->runtime->scheduler,skc_raster_cohort_sort_prefix,grid); |
| } |
| |
| static |
| void |
| skc_raster_cohort_rasterize(skc_grid_t const grid) |
| { |
| // |
| // ALLOCATED RESOURCES |
| // |
| // path_ids i |
| // raster_ids i |
| // transforms i |
| // clips i |
| // fill_cmds s |
| // cq a |
| // cohort atomics a |
| // cmds a |
| // cmds_quad a |
| // cmds_cubic a |
| // keys - |
| // meta - |
| |
| // use the backpointers |
| struct skc_raster_cohort * const cohort = skc_grid_get_data(grid); |
| struct skc_raster_builder_impl * const impl = cohort->impl; |
| struct skc_runtime * const runtime = impl->runtime; |
| |
| // |
| // RELEASED RESOURCES |
| // |
| // cmds snap |
| // |
| |
| // release the cmds extent and snap since it's only used by the expand stage |
| skc_extent_phw1g_tdrNs_snap_free(runtime,&cohort->fill_cmds); |
| |
| // |
| // NEW ALLOCATED RESOURCES |
| // |
| // transforms snap |
| // clips snap |
| // ttrk keys |
| // |
| skc_extent_phw1g_tdrNs_snap_alloc(runtime, |
| &impl->transforms, |
| &cohort->transforms, |
| cohort->cq,NULL); |
| |
| skc_extent_phw1g_tdrNs_snap_alloc(runtime, |
| &impl->clips, |
| &cohort->clips, |
| cohort->cq,NULL); |
| |
| // acquire device-side extent |
| skc_extent_tdrw_alloc(runtime, |
| &cohort->keys, |
| sizeof(union skc_ttrk) * runtime->config->raster_cohort.rasterize.keys); |
| |
| // skc_extent_thrw_tdrw_alloc(runtime, |
| // &cohort->keys, |
| // sizeof(union skc_ttrk) * runtime->config->raster_cohort.rasterize.keys); |
| |
| // |
| // acquire out-of-order command queue |
| // |
| // and launch up to 3 kernels |
| // |
| // for each kernel: |
| // |
| // set runtime "global" kernel args: |
| // |
| // - block pool atomics |
| // - block pool extent |
| // |
| // set cohort "local" kernel args: |
| // |
| // - atomics |
| // - cmds |
| // |
| // enqueue barrier |
| // enqueue copy back of atomics on the command queue |
| // set callback on copy back event |
| // release command queue |
| // |
| struct skc_raster_cohort_atomic const * const atomics = cohort->atomics.hr; |
| |
| if (atomics->cmds > 0) |
| { |
| cl(SetKernelArg(impl->kernels.rasterize_all,0,SKC_CL_ARG(runtime->block_pool.atomics.drw))); |
| cl(SetKernelArg(impl->kernels.rasterize_all,1,SKC_CL_ARG(runtime->block_pool.blocks.drw))); |
| cl(SetKernelArg(impl->kernels.rasterize_all,2,SKC_CL_ARG(runtime->block_pool.ids.drw))); |
| cl(SetKernelArg(impl->kernels.rasterize_all,3,SKC_CL_ARG(runtime->block_pool.size->ring_mask))); |
| |
| cl(SetKernelArg(impl->kernels.rasterize_all,4,SKC_CL_ARG(cohort->atomics.drw))); |
| cl(SetKernelArg(impl->kernels.rasterize_all,5,SKC_CL_ARG(cohort->keys.drw))); |
| |
| cl(SetKernelArg(impl->kernels.rasterize_all,6,SKC_CL_ARG(cohort->transforms.drN))); |
| cl(SetKernelArg(impl->kernels.rasterize_all,7,SKC_CL_ARG(cohort->clips.drN))); |
| cl(SetKernelArg(impl->kernels.rasterize_all,8,SKC_CL_ARG(cohort->cmds.drw))); |
| cl(SetKernelArg(impl->kernels.rasterize_all,9,SKC_CL_ARG(atomics->cmds))); |
| |
| skc_device_enqueue_kernel(runtime->device, |
| SKC_DEVICE_KERNEL_ID_RASTERIZE_ALL, |
| cohort->cq, |
| impl->kernels.rasterize_all, |
| atomics->cmds, |
| 0,NULL,NULL); |
| } |
| |
| // |
| // copyback number of TTSK keys |
| // |
| cl_event complete; |
| |
| skc_extent_thr_tdrw_read(&cohort->atomics,cohort->cq,&complete); |
| |
| cl(SetEventCallback(complete,CL_COMPLETE,skc_raster_cohort_rasterize_cb,grid)); |
| cl(ReleaseEvent(complete)); |
| |
| // flush command queue |
| cl(Flush(cohort->cq)); |
| |
| // |
| // ALLOCATED RESOURCES |
| // |
| // path_ids i |
| // raster_ids i |
| // transforms a |
| // clips a |
| // fill_cmds - |
| // cq a |
| // cohort atomics a |
| // cmds a |
| // keys a |
| // meta - |
| } |
| |
| static |
| void |
| skc_raster_cohort_fills_expand_cb(cl_event event, cl_int status, skc_grid_t const grid) |
| { |
| SKC_CL_CB(status); |
| |
| struct skc_raster_cohort * const cohort = skc_grid_get_data(grid); |
| |
| // as quickly as possible, enqueue next stage in pipeline to context command scheduler |
| SKC_SCHEDULER_SCHEDULE(cohort->impl->runtime->scheduler,skc_raster_cohort_rasterize,grid); |
| } |
| |
| static |
| void |
| skc_raster_builder_cohort_grid_pfn_execute(skc_grid_t const grid) |
| { |
| // |
| // ALLOCATED RESOURCES |
| // |
| // path_ids i |
| // raster_ids i |
| // transforms i |
| // clips i |
| // fill_cmds i |
| // cq - |
| // cohort atomics - |
| // cmds - |
| // keys - |
| // meta - |
| // |
| |
| // allocate the cohort |
| struct skc_raster_cohort * const cohort = skc_grid_get_data(grid); |
| |
| // get impl |
| struct skc_raster_builder_impl * const impl = cohort->impl; |
| struct skc_runtime * const runtime = impl->runtime; |
| |
| // acquire in-order cq |
| cohort->cq = skc_runtime_acquire_cq_in_order(runtime); |
| |
| // alloc the snapshot -- could be zero-sized |
| skc_extent_phw1g_tdrNs_snap_alloc(runtime, |
| &impl->fill_cmds, |
| &cohort->fill_cmds, |
| cohort->cq,NULL); |
| |
| // flush the cq to get the fill running |
| // cl(Flush(cohort->cq)); |
| |
| // create split atomics |
| skc_extent_thr_tdrw_alloc(runtime,&cohort->atomics,sizeof(struct skc_raster_cohort_atomic)); |
| |
| // zero the atomics |
| skc_extent_thr_tdrw_zero(&cohort->atomics,cohort->cq,NULL); |
| |
| // get config |
| struct skc_config const * const config = runtime->config; |
| |
| // acquire device-side extents |
| skc_extent_tdrw_alloc(runtime, |
| &cohort->cmds, |
| sizeof(union skc_cmd_rasterize) * config->raster_cohort.expand.cmds); |
| |
| // |
| // FILLS EXPAND |
| // |
| // need result of cmd counts before launching RASTERIZE grids |
| // |
| // - OpenCL 1.2: copy atomic counters back to host and launch RASTERIZE grids from host |
| // - OpenCL 2.x: have a kernel size and launch RASTERIZE grids from device |
| // - or launch a device-wide grid that feeds itself but that's unsatisfying |
| // |
| |
| // how many commands? could be zero |
| skc_uint const work_size = skc_extent_ring_snap_count(cohort->fill_cmds.snap); |
| |
| if (work_size > 0) |
| { |
| cl(SetKernelArg(impl->kernels.fills_expand,0,SKC_CL_ARG(impl->runtime->block_pool.blocks.drw))); |
| cl(SetKernelArg(impl->kernels.fills_expand,1,SKC_CL_ARG(cohort->atomics.drw))); |
| cl(SetKernelArg(impl->kernels.fills_expand,2,SKC_CL_ARG(runtime->handle_pool.map.drw))); |
| cl(SetKernelArg(impl->kernels.fills_expand,3,SKC_CL_ARG(cohort->fill_cmds.drN))); |
| cl(SetKernelArg(impl->kernels.fills_expand,4,SKC_CL_ARG(cohort->cmds.drw))); |
| |
| skc_device_enqueue_kernel(runtime->device, |
| SKC_DEVICE_KERNEL_ID_FILLS_EXPAND, |
| cohort->cq, |
| impl->kernels.fills_expand, |
| work_size, |
| 0,NULL,NULL); |
| } |
| |
| // |
| // copyback number of rasterization commands |
| // |
| cl_event complete; |
| |
| skc_extent_thr_tdrw_read(&cohort->atomics,cohort->cq,&complete); |
| |
| cl(SetEventCallback(complete,CL_COMPLETE,skc_raster_cohort_fills_expand_cb,grid)); |
| cl(ReleaseEvent(complete)); |
| |
| // flush command queue |
| cl(Flush(cohort->cq)); |
| |
| // |
| // ALLOCATED RESOURCES |
| // |
| // path_ids i |
| // raster_ids i |
| // transforms i |
| // clips i |
| // fill_cmds s |
| // cq a |
| // cohort atomics a |
| // cmds a |
| // keys - |
| // meta - |
| // |
| } |
| |
| // |
| // move grid into waiting state |
| // |
| // this entails allocating a cohort from the temporary extent |
| // |
| |
| static |
| void |
| skc_raster_builder_cohort_grid_pfn_waiting(skc_grid_t const grid) |
| { |
| // get the impl |
| struct skc_raster_builder_impl * const impl = skc_grid_get_data(grid); |
| struct skc_runtime * const runtime = impl->runtime; |
| |
| // retain the raster builder |
| impl->raster_builder->refcount += 1; |
| |
| // allocate the ephemeral/temp cohort |
| skc_subbuf_id_t id; |
| |
| struct skc_raster_cohort * const cohort = |
| skc_runtime_host_temp_alloc(runtime, |
| SKC_MEM_FLAGS_READ_WRITE, |
| sizeof(*cohort), |
| &id, |
| NULL); |
| |
| // save the id and backpointer |
| cohort->id = id; |
| cohort->impl = impl; |
| |
| // set grid data -- replaces impl |
| skc_grid_set_data(grid,cohort); |
| |
| // |
| // ACQUIRE RESOURCES FOR THE COHORT |
| // |
| |
| struct skc_raster_builder * const raster_builder = impl->raster_builder; |
| |
| // immediately take snapshots of all rings -- these are very inexpensive operations |
| skc_extent_phrwg_thr1s_snap_init(runtime,&raster_builder->path_ids .ring,&cohort->path_ids); |
| skc_extent_phw1g_tdrNs_snap_init(runtime,&raster_builder->transforms.ring,&cohort->transforms); |
| skc_extent_phw1g_tdrNs_snap_init(runtime,&raster_builder->clips .ring,&cohort->clips); |
| skc_extent_phw1g_tdrNs_snap_init(runtime,&raster_builder->fill_cmds .ring,&cohort->fill_cmds); |
| skc_extent_phrwg_tdrNs_snap_init(runtime,&raster_builder->raster_ids.ring,&cohort->raster_ids); |
| |
| // |
| // ALLOCATED RESOURCES |
| // |
| // path_ids i |
| // raster_ids i |
| // transforms i |
| // clips i |
| // fill_cmds i |
| // cq - |
| // cohort atomics - |
| // cmds - |
| // keys - |
| // meta - |
| // |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_raster_builder_cohort_create(struct skc_raster_builder_impl * const impl) |
| { |
| // attach a grid |
| impl->cohort = SKC_GRID_DEPS_ATTACH(impl->runtime->deps, |
| &impl->cohort, |
| impl, |
| skc_raster_builder_cohort_grid_pfn_waiting, |
| skc_raster_builder_cohort_grid_pfn_execute, |
| skc_raster_builder_cohort_grid_pfn_dispose); |
| } |
| |
| // |
| // |
| // |
| |
| static |
| skc_err |
| skc_raster_builder_pfn_add(struct skc_raster_builder_impl * const impl, |
| skc_path_t const * paths, |
| skc_uint count) |
| { |
| // validate and retain the path |
| skc_err err; |
| |
| err = skc_runtime_handle_device_validate_retain(impl->runtime, |
| SKC_TYPED_HANDLE_TYPE_IS_PATH, |
| paths, |
| count); |
| |
| if (err) |
| return err; |
| |
| skc_runtime_handle_device_retain(impl->runtime,paths,count); |
| |
| // make sure there is a grid |
| if (impl->cohort == NULL) { |
| skc_raster_builder_cohort_create(impl); |
| } |
| |
| // declare rasterization grid happens after path |
| while (count-- > 0) |
| skc_grid_happens_after_handle(impl->cohort,SKC_TYPED_HANDLE_TO_HANDLE(*paths++)); |
| |
| return SKC_ERR_SUCCESS; |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_raster_builder_pfn_end(struct skc_raster_builder_impl * const impl, skc_raster_t * const raster) |
| { |
| // |
| // acquire host-managed path raster handle and bump reference count |
| // to 2 handles will be released (reduced to 1) once the rasters are |
| // completely rasterized |
| // |
| *raster = skc_runtime_handle_device_acquire(impl->runtime); |
| |
| // make sure there is a grid |
| if (impl->cohort == NULL) { |
| skc_raster_builder_cohort_create(impl); |
| } |
| |
| // map a handle to a grid |
| skc_grid_map(impl->cohort,*raster); |
| } |
| |
| // |
| // snapshot the ring and lazily start the grid |
| // |
| // FIXME -- might want to revisit this and settle on an even more |
| // opaque implementation. Some options: |
| // |
| // - never let the SKC API expose a forced grid start |
| // - make snapshots kick off a forced grid start |
| // - be lazy all the time everywhere |
| // |
| |
| static |
| void |
| skc_raster_builder_pfn_start(struct skc_raster_builder_impl * const impl) |
| { |
| skc_grid_t const cohort = impl->cohort; |
| |
| if (cohort != NULL) { |
| skc_grid_start(cohort); |
| } |
| } |
| |
| // |
| // NOTE: THIS MIGHT BE REMOVED |
| // |
| |
| static |
| void |
| skc_raster_builder_pfn_force(struct skc_raster_builder_impl * const impl) |
| { |
| skc_grid_t const cohort = impl->cohort; |
| |
| if (cohort != NULL) { |
| skc_grid_force(cohort); |
| } |
| } |
| |
| // |
| // |
| // |
| |
| skc_err |
| skc_raster_builder_cl_12_create(struct skc_context * const context, |
| struct skc_raster_builder * * const raster_builder) |
| { |
| struct skc_runtime * const runtime = context->runtime; |
| |
| // allocate raster builder |
| (*raster_builder) = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(**raster_builder)); |
| |
| // refcount |
| (*raster_builder)->refcount = 1; |
| |
| // state |
| SKC_ASSERT_STATE_INIT((*raster_builder),SKC_RASTER_BUILDER_STATE_READY); |
| |
| // allocate runtime raster builder |
| struct skc_raster_builder_impl * const impl = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(*impl)); |
| |
| // save the impl |
| (*raster_builder)->impl = impl; |
| |
| // intialize impl |
| impl->raster_builder = (*raster_builder); |
| impl->runtime = runtime; |
| impl->cohort = NULL; |
| |
| // get config |
| struct skc_config const * const config = runtime->config; |
| |
| skc_extent_phrwg_thr1s_alloc(runtime,&impl->path_ids ,sizeof(skc_path_t ) * config->raster_cohort.path_ids .elem_count); |
| skc_extent_phw1g_tdrNs_alloc(runtime,&impl->transforms,sizeof(union skc_transform) * config->raster_cohort.transforms.elem_count); |
| skc_extent_phw1g_tdrNs_alloc(runtime,&impl->clips ,sizeof(union skc_path_clip) * config->raster_cohort.clips .elem_count); |
| skc_extent_phw1g_tdrNs_alloc(runtime,&impl->fill_cmds ,sizeof(union skc_cmd_fill ) * config->raster_cohort.fill .elem_count); |
| skc_extent_phrwg_tdrNs_alloc(runtime,&impl->raster_ids,sizeof(skc_raster_t ) * config->raster_cohort.raster_ids.elem_count); |
| |
| // retain the context |
| //skc_context_retain(context); |
| |
| (*raster_builder)->context = context; |
| |
| (*raster_builder)->add = skc_raster_builder_pfn_add; |
| (*raster_builder)->end = skc_raster_builder_pfn_end; |
| (*raster_builder)->start = skc_raster_builder_pfn_start; |
| (*raster_builder)->force = skc_raster_builder_pfn_force; |
| (*raster_builder)->release = skc_raster_builder_pfn_release; |
| |
| // initialize raster builder with host-writable buffers |
| (*raster_builder)->path_ids .extent = impl->path_ids.hrw; |
| (*raster_builder)->transforms.extent = impl->transforms.hw1; |
| (*raster_builder)->clips .extent = impl->clips.hw1; |
| (*raster_builder)->fill_cmds .extent = impl->fill_cmds.hw1; |
| (*raster_builder)->raster_ids.extent = impl->raster_ids.hrw; |
| |
| // |
| // the rings perform bookkeeping on the extents |
| // |
| // the ring snapshotting and checkpointing are necessary because |
| // another part of the API can _force_ the raster cohort to flush |
| // its work-in-progress commands but only up to a checkpointed |
| // boundary |
| // |
| skc_extent_ring_init(&(*raster_builder)->path_ids.ring, |
| config->raster_cohort.path_ids.elem_count, |
| config->raster_cohort.path_ids.snap_count, |
| sizeof(skc_path_t)); |
| |
| skc_extent_ring_init(&(*raster_builder)->transforms.ring, |
| config->raster_cohort.transforms.elem_count, |
| config->raster_cohort.transforms.snap_count, |
| sizeof(union skc_transform)); |
| |
| skc_extent_ring_init(&(*raster_builder)->clips.ring, |
| config->raster_cohort.clips.elem_count, |
| config->raster_cohort.clips.snap_count, |
| sizeof(union skc_path_clip)); |
| |
| skc_extent_ring_init(&(*raster_builder)->fill_cmds.ring, |
| config->raster_cohort.fill.elem_count, |
| config->raster_cohort.fill.snap_count, |
| sizeof(union skc_cmd_fill)); |
| |
| skc_extent_ring_init(&(*raster_builder)->raster_ids.ring, |
| config->raster_cohort.raster_ids.elem_count, |
| config->raster_cohort.raster_ids.snap_count, |
| sizeof(skc_raster_t)); |
| |
| // |
| // acquire kernels |
| // |
| impl->kernels.fills_expand = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_FILLS_EXPAND); |
| impl->kernels.rasterize_all = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERIZE_ALL); |
| |
| #if 0 |
| impl->kernels.rasterize_lines = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERIZE_LINES); |
| impl->kernels.rasterize_quads = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERIZE_QUADS); |
| impl->kernels.rasterize_cubics = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERIZE_CUBICS); |
| #endif |
| |
| impl->kernels.segment = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_SEGMENT_TTRK); |
| impl->kernels.rasters_alloc = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERS_ALLOC); |
| impl->kernels.prefix = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_PREFIX); |
| |
| return SKC_ERR_SUCCESS; |
| } |
| |
| // |
| // |
| // |