| /* |
| * Copyright 2017 Google Inc. |
| * |
| * Use of this source code is governed by a BSD-style license that can |
| * be found in the LICENSE file. |
| * |
| */ |
| |
| // |
| // |
| // |
| |
| #include <stddef.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <float.h> |
| #include <stdio.h> |
| |
| #include "common/cl/assert_cl.h" |
| |
| #include "context.h" |
| #include "handle.h" |
| #include "grid.h" |
| #include "path.h" |
| #include "path_builder.h" |
| |
| #include "config_cl.h" |
| #include "export_cl_12.h" |
| #include "runtime_cl_12.h" |
| #include "path_builder_cl_12.h" |
| |
| // |
| // OpenCL 1.2 devices support mapping of buffers into the host address |
| // space. |
| // |
| // Mapped buffers must be aligned on MIN_DATA_TYPE_ALIGN_SIZE bit |
| // boundary (e.g. 128 bytes). This complicates coordinating sharing |
| // of data between the host and the device. |
| // |
| // Some OpenCL 2.0 devices support fine-grained shared virtual memory |
| // pointers with byte-addressing and allow simpler coordination |
| // strategies at the cost of maintaining cache coherency. |
| // |
| // The path builder is focused on moving bulk path data from the host |
| // into the device-managed "block" memory pool and arranging it into a |
| // SIMT/SIMD-friendly data structure that can be efficiently read by |
| // the rasterizer. |
| // |
| // Note that one simplifying assumption is that the maximum length of |
| // a *single* path can't be larger than what fits in the single extent |
| // (which is split into M subbuffers). This would be a very long path |
| // and a legitimate size limitation. |
| // |
| // For some systems, it may be appropriate to never pull path data |
| // into the device-managed block pool and instead present the path |
| // data to the device in a temporarily available allocated memory |
| // "zone" of paths that can be discarded all at once. |
| // |
| // For other systems, it may be appropriate to simply copy the path |
| // data from host to device. |
| // |
| // But the majority of OpenCL (and VK, MTL, DX12) devices we'll be |
| // targeting support basic map/unmap functionality similar to OpenCL |
| // 1.2. Furthermore, not all OpenCL 2.0 devices support fine-grained |
| // sharing of memory and still require a map/unmap step... but note |
| // that they all support byte-aligned mapping and subbuffers. |
| // |
| // The general strategy that this particular CL_12 implementation uses |
| // is to allocate a large mappable bulk-data path buffer and an |
| // auxilary mappable command buffer. |
| // |
| // The buffers are split into a reasonable number of properly aligned |
| // subbuffers to enable simultaneous host and device access. |
| // |
| |
| // |
| // Blocks: |
| // 1 extent |
| // M mapped subbuffers (configurable) to allow for concurrency |
| // |
| // Commands: |
| // 1 extent |
| // M mapped subbuffers (configurable) to allow for concurrency |
| // |
| // Spans: |
| // M hi/lo structures |
| // |
| // { cl_sub, void*, event, base } |
| // |
| // - size of sub buffer |
| // - remaining |
| // |
| // - counts |
| // |
| |
| // |
| // For any kernel launch, at most one path will be discontiguous and |
| // defined across two sub-buffers. |
| // |
| // Nodes are updated locally until full and then stored so they will |
| // never be incomplete. Headers are stored locally until the path is |
| // ended so they will never be incomplete. |
| // |
| // A line, quad or cubic acquires 4/6/8 segments which may be spread |
| // across one or more congtiguous blocks. |
| // |
| // If a flush() occurs then the remaining columns of multi-segment |
| // paths are initialized with zero-length line, quad, cubic elements. |
| // |
| // Every block's command word has a type and a count acquired from a |
| // rolling counter. |
| // |
| // The kernel is passed two spans of blocks { base, count } to |
| // process. The grid is must process (lo.count + hi.count) blocks. |
| // |
| |
| struct skc_subbuffer_blocks |
| { |
| cl_mem device; |
| void * host; |
| }; |
| |
| struct skc_subbuffer_cmds |
| { |
| cl_mem device; |
| void * host; |
| cl_event map; |
| }; |
| |
| // |
| // ringdex is an index with range [0, blocks-per-subbuf * subbufs-per-buffer ) |
| // |
| |
| typedef skc_uint skc_ringdex_t; |
| |
| union skc_ringdex_expand |
| { |
| div_t qr; |
| |
| struct { |
| #ifndef SKC_DIV_REM_BEFORE_QUOT // offsetof(div_t,quot) != 0 |
| skc_uint subbuf; |
| skc_uint block; |
| #else |
| skc_uint block; |
| skc_uint subbuf; |
| #endif |
| }; |
| }; |
| |
| // |
| // this record is executed by the grid |
| // |
| |
| struct skc_release_record |
| { |
| struct skc_path_builder_impl * impl; // back pointer to impl |
| |
| skc_grid_t grid; // pointer to scheduled grid |
| |
| skc_uint from; // inclusive starting index : [from,to) |
| skc_uint to; // non-inclusive ending index : [from,to) |
| }; |
| |
| // |
| // |
| // |
| |
| struct skc_path_builder_impl |
| { |
| struct skc_path_builder * path_builder; |
| |
| struct skc_runtime * runtime; |
| |
| cl_command_queue cq; |
| |
| struct { |
| cl_kernel alloc; |
| cl_kernel copy; |
| } kernels; |
| |
| // |
| // FIXME -- make this pointer to constant config |
| // |
| // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv |
| struct { |
| skc_uint subbufs; // how many subbufs in the buffer? |
| |
| struct { |
| skc_uint buffer; // how many blocks in the buffer? |
| skc_uint subbuf; // how many blocks in a subbuf? |
| } blocks_per; |
| } ring; |
| // |
| // ^^^^^^^^^^^ don't duplicate these constants ^^^^^^^^^^^^^^^^^^ |
| // |
| |
| struct { |
| cl_mem buffer; // backing buffer for blocks |
| struct skc_subbuffer_blocks * subbufs; // array of structures |
| } blocks; |
| |
| struct { |
| cl_mem buffer; // backing buffer for commands |
| struct skc_subbuffer_cmds * subbufs; // array of structures |
| } cmds; |
| |
| struct { |
| struct skc_release_record * records; // max release records is equal to max subbufs |
| skc_path_t * paths; // max paths is less than or equal to max commands |
| } release; |
| |
| cl_mem reads; // each kernel only requires one word to store the block pool "base" |
| |
| struct { |
| skc_uint rolling; // rolling counter used by cmds to map to block pool alloc |
| skc_ringdex_t from; |
| skc_ringdex_t to; |
| } prev; |
| |
| struct { |
| skc_ringdex_t from; |
| skc_ringdex_t to; |
| } curr; |
| |
| struct { |
| struct skc_path_head * head; // pointer to local path header -- not written until path end |
| struct skc_path_node * node; // pointer to local node -- may alias head until head is full |
| |
| struct { |
| skc_uint rolling; // rolling counter of wip node -- valid after one node is allocated |
| union skc_tagged_block_id * next; // next slot in node -- may initially point to head.ids |
| skc_uint rem; // how many id slots left in node block |
| } ids; |
| |
| struct { |
| skc_uint rem; // how many subblocks left in block? |
| skc_uint rolling; // rolling counter of block of subblocks |
| float * next; // next subblock in current subblock block |
| skc_uint idx; // index of next subblock |
| } subblocks; |
| |
| struct { |
| skc_uint one; // .block = 1 |
| skc_uint next; // rolling counter used by cmds to map to block pool alloc |
| } rolling; |
| |
| skc_ringdex_t to; // ringdex of _next_available_ command/block in ring -- FIXME -- should be current |
| } wip; |
| }; |
| |
| // |
| // FIXME -- move to a pow2 subbuffer size and dispense with division |
| // and modulo operations |
| // |
| |
| static |
| union skc_ringdex_expand |
| skc_ringdex_expand(struct skc_path_builder_impl * const impl, |
| skc_ringdex_t const ringdex) |
| { |
| return (union skc_ringdex_expand){ |
| .qr = div(ringdex,impl->ring.blocks_per.subbuf) |
| }; |
| } |
| |
| static |
| void |
| skc_ringdex_wip_to_block_inc(struct skc_path_builder_impl * const impl) |
| { |
| // |
| // FIXME - which is faster? |
| // |
| #if 1 |
| impl->wip.to = (impl->wip.to + 1) % impl->ring.blocks_per.buffer; |
| #else |
| impl->wip.to -= (impl->wip.to < impl->ring.blocks_per.buffer) ? -1 : impl->wip.to; |
| #endif |
| |
| // this path is too long -- for now assert() and die |
| assert(impl->wip.to != impl->curr.from); |
| } |
| |
| static |
| skc_ringdex_t |
| skc_ringdex_span(struct skc_path_builder_impl * const impl, |
| skc_ringdex_t const from, |
| skc_ringdex_t const to) |
| { |
| return (to - from) % impl->ring.blocks_per.buffer; |
| } |
| |
| static |
| void |
| skc_ringdex_wip_to_subbuf_inc(struct skc_path_builder_impl * const impl) |
| { |
| union skc_ringdex_expand const to = skc_ringdex_expand(impl,impl->wip.to); |
| |
| // nothing to do if this is the first block in the subbuf |
| if (to.block == 0) |
| return; |
| |
| skc_uint const new_subbuf = (to.subbuf + 1) % impl->ring.subbufs; |
| |
| // otherwise increment and mod |
| impl->wip.to = new_subbuf * impl->ring.blocks_per.subbuf; |
| } |
| |
| static |
| skc_bool |
| skc_ringdex_curr_is_equal(struct skc_path_builder_impl * const impl) |
| { |
| return impl->curr.from == impl->curr.to; |
| } |
| |
| static |
| skc_bool |
| skc_ringdex_prev_is_equal(struct skc_path_builder_impl * const impl) |
| { |
| return impl->prev.from == impl->prev.to; |
| } |
| |
| static |
| skc_uint |
| skc_ringdex_dont_map_last(struct skc_path_builder_impl * const impl, |
| skc_uint const to_block) |
| { |
| // no blocks acquired OR this is last block in subbuf |
| return !((impl->wip.to == impl->curr.to) || (to_block == 0)); |
| } |
| |
| // |
| // |
| // |
| |
| static |
| struct skc_release_record * |
| skc_release_curr(struct skc_path_builder_impl * const impl) |
| { |
| union skc_ringdex_expand curr_from = skc_ringdex_expand(impl,impl->curr.from); |
| |
| return impl->release.records + curr_from.subbuf; |
| } |
| |
| // |
| // FIXME -- get rid of all distant config references -- grab them at all at creation time |
| // |
| |
| static |
| void |
| skc_path_builder_pfn_begin(struct skc_path_builder_impl * const impl) |
| { |
| // init header counters // { handle, blocks, nodes, prims } |
| impl->wip.head->header = (union skc_path_header){ |
| .handle = 0, |
| .blocks = 0, |
| .nodes = 0, |
| .prims = 0 |
| }; |
| |
| // FIXME -- BOUNDS SHOULD USE SIMD4 TRICK AND NEGATE ONE OF THE CORNERS |
| impl->wip.head->bounds = (union skc_path_bounds){ +FLT_MIN, +FLT_MIN, -FLT_MIN, -FLT_MIN }; |
| |
| // point wip ids at local head node |
| impl->wip.ids.next = impl->wip.head->tag_ids; // point to local head node |
| impl->wip.ids.rem = impl->runtime->config->block.words - SKC_PATH_HEAD_WORDS; // FIXME -- save this constant somewhere |
| |
| // start with no subblocks |
| impl->wip.subblocks.rem = 0; |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_path_builder_impl_finalize_node(struct skc_path_builder_impl * const impl) |
| { |
| #if 1 |
| // |
| // FIXME -- a Duff's device might be optimal here but would have to |
| // be customized per device since node's could be 16-128+ words |
| // |
| while (impl->wip.ids.rem > 0) |
| { |
| impl->wip.ids.rem -= 1; |
| impl->wip.ids.next->u32 = SKC_TAGGED_BLOCK_ID_INVALID; |
| impl->wip.ids.next += 1; |
| } |
| #else |
| memset(&impl->wip.ids.next->u32, |
| SKC_TAGGED_BLOCK_ID_INVALID, // 0xFF |
| sizeof(impl->wip.ids.next->u32) * impl->wip.ids.rem); |
| |
| impl->wip.ids.next += impl->wip.ids.rem; |
| impl->wip.ids.rem = 0; |
| #endif |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_zero_float(skc_float * p, skc_uint rem) |
| { |
| memset(p,0,sizeof(*p)*rem); |
| } |
| |
| static |
| void |
| skc_path_builder_finalize_subblocks(struct skc_path_builder * const path_builder) |
| { |
| // |
| // FIXME -- it might be more performant to zero the remaining |
| // columns in a subblock -- a subblock at a time -- instead of the |
| // same column across all the subblocks |
| // |
| #if 0 |
| while (path_builder->line.rem > 0) |
| { |
| --path_builder->line.rem; |
| |
| *path_builder->line.coords[0]++ = 0.0f; |
| *path_builder->line.coords[1]++ = 0.0f; |
| *path_builder->line.coords[2]++ = 0.0f; |
| *path_builder->line.coords[3]++ = 0.0f; |
| } |
| |
| while (path_builder->quad.rem > 0) |
| { |
| --path_builder->quad.rem; |
| |
| *path_builder->line.coords[0]++ = 0.0f; |
| *path_builder->line.coords[1]++ = 0.0f; |
| *path_builder->line.coords[2]++ = 0.0f; |
| *path_builder->line.coords[3]++ = 0.0f; |
| *path_builder->line.coords[4]++ = 0.0f; |
| *path_builder->line.coords[5]++ = 0.0f; |
| } |
| |
| while (path_builder->cubic.rem > 0) |
| { |
| --path_builder->cubic.rem; |
| |
| *path_builder->line.coords[0]++ = 0.0f; |
| *path_builder->line.coords[1]++ = 0.0f; |
| *path_builder->line.coords[2]++ = 0.0f; |
| *path_builder->line.coords[3]++ = 0.0f; |
| *path_builder->line.coords[4]++ = 0.0f; |
| *path_builder->line.coords[5]++ = 0.0f; |
| *path_builder->line.coords[6]++ = 0.0f; |
| *path_builder->line.coords[7]++ = 0.0f; |
| } |
| #else |
| if (path_builder->line.rem > 0) |
| { |
| skc_zero_float(path_builder->line.coords[0],path_builder->line.rem); |
| skc_zero_float(path_builder->line.coords[1],path_builder->line.rem); |
| skc_zero_float(path_builder->line.coords[2],path_builder->line.rem); |
| skc_zero_float(path_builder->line.coords[3],path_builder->line.rem); |
| |
| path_builder->line.rem = 0; |
| } |
| |
| if (path_builder->quad.rem > 0) |
| { |
| skc_zero_float(path_builder->quad.coords[0],path_builder->quad.rem); |
| skc_zero_float(path_builder->quad.coords[1],path_builder->quad.rem); |
| skc_zero_float(path_builder->quad.coords[2],path_builder->quad.rem); |
| skc_zero_float(path_builder->quad.coords[3],path_builder->quad.rem); |
| skc_zero_float(path_builder->quad.coords[4],path_builder->quad.rem); |
| skc_zero_float(path_builder->quad.coords[5],path_builder->quad.rem); |
| |
| path_builder->quad.rem = 0; |
| } |
| |
| if (path_builder->cubic.rem > 0) |
| { |
| skc_zero_float(path_builder->cubic.coords[0],path_builder->cubic.rem); |
| skc_zero_float(path_builder->cubic.coords[1],path_builder->cubic.rem); |
| skc_zero_float(path_builder->cubic.coords[2],path_builder->cubic.rem); |
| skc_zero_float(path_builder->cubic.coords[3],path_builder->cubic.rem); |
| skc_zero_float(path_builder->cubic.coords[4],path_builder->cubic.rem); |
| skc_zero_float(path_builder->cubic.coords[5],path_builder->cubic.rem); |
| skc_zero_float(path_builder->cubic.coords[6],path_builder->cubic.rem); |
| skc_zero_float(path_builder->cubic.coords[7],path_builder->cubic.rem); |
| |
| path_builder->cubic.rem = 0; |
| } |
| #endif |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_path_builder_impl_unmap(struct skc_path_builder_impl * const impl, |
| skc_uint from, |
| skc_uint to) |
| { |
| // to might be out of range |
| to = to % impl->ring.subbufs; |
| |
| #if 0 |
| fprintf(stderr,"unmap: [%2u,%2u)\n",from,to); |
| #endif |
| |
| while (from != to) // 'to' might be out of range |
| { |
| // bring 'from' back in range |
| from = from % impl->ring.subbufs; |
| |
| struct skc_subbuffer_blocks * const blocks = impl->blocks.subbufs + from; |
| struct skc_subbuffer_cmds * const cmds = impl->cmds .subbufs + from; |
| |
| cl(EnqueueUnmapMemObject(impl->cq, |
| blocks->device, |
| blocks->host, |
| 0,NULL,NULL)); |
| |
| cl(EnqueueUnmapMemObject(impl->cq, |
| cmds->device, |
| cmds->host, |
| 0,NULL,NULL)); |
| |
| // bring from back in range |
| from = (from + 1) % impl->ring.subbufs; |
| } |
| } |
| |
| // |
| // FIXME -- reuse this in create() |
| // |
| |
| static |
| void |
| skc_path_builder_impl_map(struct skc_path_builder_impl * const impl, |
| skc_uint from, |
| skc_uint to) |
| { |
| // to might be out of range |
| to = to % impl->ring.subbufs; |
| |
| #if 0 |
| fprintf(stderr," map: [%2u,%2u)\n",from,to); |
| #endif |
| |
| while (from != to) |
| { |
| cl_int cl_err; |
| |
| struct skc_subbuffer_blocks * const blocks = impl->blocks.subbufs + from; |
| struct skc_subbuffer_cmds * const cmds = impl->cmds .subbufs + from; |
| |
| blocks->host = clEnqueueMapBuffer(impl->cq, |
| blocks->device, |
| CL_FALSE, |
| CL_MAP_WRITE_INVALIDATE_REGION, |
| 0,impl->runtime->config->paths_copy.block.subbuf, |
| 0,NULL,NULL, |
| &cl_err); cl_ok(cl_err); |
| |
| cl(ReleaseEvent(cmds->map)); |
| |
| cmds->host = clEnqueueMapBuffer(impl->cq, |
| cmds->device, |
| CL_FALSE, |
| CL_MAP_WRITE_INVALIDATE_REGION, |
| 0,impl->runtime->config->paths_copy.command.subbuf, |
| 0,NULL,&cmds->map, |
| &cl_err); cl_ok(cl_err); |
| |
| // bring from back in range |
| from = (from + 1) % impl->ring.subbufs; |
| } |
| // |
| // FIXME -- when we switch to out of order queues we'll need a barrier here |
| // |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_path_builder_release_dispose(struct skc_release_record * const release, |
| struct skc_path_builder_impl * const impl) |
| { |
| struct skc_runtime * runtime = impl->runtime; |
| |
| if (release->from <= release->to) // no wrap |
| { |
| skc_path_t const * paths = impl->release.paths + release->from; |
| skc_uint count = release->to - release->from; |
| |
| skc_grid_deps_unmap(runtime->deps,paths,count); |
| skc_runtime_path_device_release(runtime,paths,count); |
| } |
| else // from > to implies wrap |
| { |
| skc_path_t const * paths_lo = impl->release.paths + release->from; |
| skc_uint count_lo = impl->ring.blocks_per.buffer - release->from; |
| |
| skc_grid_deps_unmap(runtime->deps,paths_lo,count_lo); |
| skc_runtime_path_device_release(runtime,paths_lo,count_lo); |
| |
| skc_grid_deps_unmap(runtime->deps,impl->release.paths,release->to); |
| skc_runtime_path_device_release(runtime,impl->release.paths,release->to); |
| } |
| |
| release->to = release->from; |
| } |
| |
| static |
| void |
| skc_path_builder_grid_pfn_dispose(skc_grid_t const grid) |
| { |
| struct skc_release_record * const release = skc_grid_get_data(grid); |
| struct skc_path_builder_impl * const impl = release->impl; |
| |
| skc_path_builder_release_dispose(release,impl); |
| } |
| |
| static |
| void |
| // skc_path_builder_complete(struct skc_release_record * const release) |
| skc_path_builder_complete(skc_grid_t grid) |
| { |
| // |
| // notify deps that this grid is complete enough for other grids to |
| // proceed |
| // |
| // the path builder still has some cleanup to do before all its |
| // resources can be reused |
| // |
| skc_grid_complete(grid); |
| } |
| |
| static |
| void |
| skc_path_builder_paths_copy_cb(cl_event event, cl_int status, skc_grid_t grid) |
| { |
| SKC_CL_CB(status); |
| |
| struct skc_release_record * const release = skc_grid_get_data(grid); |
| |
| SKC_SCHEDULER_SCHEDULE(release->impl->runtime->scheduler,skc_path_builder_complete,grid); |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_path_builder_grid_pfn_waiting(skc_grid_t const grid) |
| { |
| struct skc_release_record * const release = skc_grid_get_data(grid); |
| struct skc_path_builder_impl * const impl = release->impl; |
| |
| // 1. flush incomplete subblocks of path elements |
| // 2. unmap subbuffer on cq.unmap |
| // 3. flush cq.unmap |
| // 4. launch kernel on cq.kernel but wait for unmap completion |
| // 5. flush cq.kernel |
| // 6. remap relevant subbuffers on cq.map but wait for kernel completion |
| // 7. flush cq.map |
| |
| // |
| // FIXME -- can be smarter about flushing if the wip paths are not |
| // in the same subbuf as curr.to |
| // |
| // THIS IS IMPORTANT TO FIX |
| // |
| |
| // flush incomplete subblocks |
| skc_path_builder_finalize_subblocks(impl->path_builder); |
| |
| // |
| // get range of subbufs that need to be unmapped |
| // |
| // note that impl->prev subbufs have already been unmapped |
| // |
| union skc_ringdex_expand curr_from = skc_ringdex_expand(impl,impl->curr.from); |
| union skc_ringdex_expand curr_to = skc_ringdex_expand(impl,impl->curr.to); |
| skc_uint const is_partial = curr_to.block > 0; |
| skc_uint const unmap_to = curr_to.subbuf + is_partial; |
| |
| // |
| // unmap all subbufs in range [from,to) |
| // |
| skc_path_builder_impl_unmap(impl,curr_from.subbuf,unmap_to); |
| |
| // |
| // launch kernels |
| // |
| skc_uint const pb_prev_span = skc_ringdex_span(impl,impl->prev.from,impl->prev.to); |
| skc_uint const pb_curr_span = skc_ringdex_span(impl,impl->curr.from,impl->curr.to); |
| skc_uint const pb_cmds = pb_prev_span + pb_curr_span; |
| |
| // |
| // 1) allocate blocks from pool |
| // |
| |
| // |
| // FIXME -- pack integers into struct/vector |
| // |
| cl(SetKernelArg(impl->kernels.alloc,0,SKC_CL_ARG(impl->runtime->block_pool.atomics.drw))); |
| cl(SetKernelArg(impl->kernels.alloc,1,SKC_CL_ARG(impl->reads))); |
| cl(SetKernelArg(impl->kernels.alloc,2,SKC_CL_ARG(curr_from.subbuf))); |
| cl(SetKernelArg(impl->kernels.alloc,3,SKC_CL_ARG(pb_cmds))); |
| |
| skc_device_enqueue_kernel(impl->runtime->device, |
| SKC_DEVICE_KERNEL_ID_PATHS_ALLOC, |
| impl->cq, |
| impl->kernels.alloc, |
| 1, |
| 0,NULL,NULL); |
| |
| // |
| // 2) copy blocks from unmapped device-accessible memory |
| // |
| |
| // |
| // FIXME -- pack integers into struct/vector and reduce 13 arguments down to 7 |
| // |
| cl(SetKernelArg(impl->kernels.copy, 0,SKC_CL_ARG(impl->runtime->handle_pool.map.drw))); |
| |
| cl(SetKernelArg(impl->kernels.copy, 1,SKC_CL_ARG(impl->runtime->block_pool.ids.drw))); |
| cl(SetKernelArg(impl->kernels.copy, 2,SKC_CL_ARG(impl->runtime->block_pool.blocks.drw))); |
| cl(SetKernelArg(impl->kernels.copy, 3,SKC_CL_ARG(impl->runtime->block_pool.size->ring_mask))); |
| |
| cl(SetKernelArg(impl->kernels.copy, 4,SKC_CL_ARG(impl->reads))); |
| cl(SetKernelArg(impl->kernels.copy, 5,SKC_CL_ARG(curr_from.subbuf))); |
| |
| cl(SetKernelArg(impl->kernels.copy, 6,SKC_CL_ARG(impl->cmds.buffer))); |
| cl(SetKernelArg(impl->kernels.copy, 7,SKC_CL_ARG(impl->blocks.buffer))); |
| |
| cl(SetKernelArg(impl->kernels.copy, 8,SKC_CL_ARG(impl->ring.blocks_per.buffer))); |
| cl(SetKernelArg(impl->kernels.copy, 9,SKC_CL_ARG(impl->prev.rolling))); |
| |
| cl(SetKernelArg(impl->kernels.copy,10,SKC_CL_ARG(impl->prev.from))); |
| cl(SetKernelArg(impl->kernels.copy,11,SKC_CL_ARG(pb_prev_span))); |
| cl(SetKernelArg(impl->kernels.copy,12,SKC_CL_ARG(impl->curr.from))); |
| |
| cl_event complete; |
| |
| skc_device_enqueue_kernel(impl->runtime->device, |
| SKC_DEVICE_KERNEL_ID_PATHS_COPY, |
| impl->cq, |
| impl->kernels.copy, |
| pb_cmds, |
| 0,NULL,&complete); |
| |
| // set a callback on completion |
| cl(SetEventCallback(complete,CL_COMPLETE, |
| skc_path_builder_paths_copy_cb, |
| grid)); |
| |
| // immediately release |
| cl(ReleaseEvent(complete)); |
| |
| // |
| // remap as many subbuffers as possible after the kernel completes |
| // |
| // note that remaps are async and enqueued on the same command queue |
| // as the kernel launch |
| // |
| // we can't remap subbuffers that are in the possibly empty range |
| // |
| // cases: |
| // |
| // - curr.to == wip.to which means no blocks have been acquired |
| // - curr.to points to first block in (next) subbuf |
| // - otherwise, wip acquired blocks in the curr.to subbuf |
| // |
| // check for these first 2 cases! |
| // |
| union skc_ringdex_expand const prev_from = skc_ringdex_expand(impl,impl->prev.from); |
| skc_uint const no_wip = impl->curr.to == impl->wip.to; |
| skc_uint map_to = curr_to.subbuf + (is_partial && no_wip); |
| |
| // remap all subbufs in range [from,to) |
| skc_path_builder_impl_map(impl,prev_from.subbuf,map_to); |
| |
| // flush command queue |
| cl(Flush(impl->cq)); |
| |
| // save rolling |
| impl->prev.rolling = impl->wip.rolling.next; |
| |
| // update prev and curr |
| if (no_wip) |
| { |
| // |
| // if there was no wip then round up to the next subbuf |
| // |
| skc_ringdex_wip_to_subbuf_inc(impl); |
| |
| // |
| // update prev/curr with with incremented wip |
| // |
| impl->prev.from = impl->prev.to = impl->wip.to; |
| impl->curr.from = impl->curr.to = impl->wip.to; |
| } |
| else |
| { |
| // |
| // update prev with wip partials |
| // |
| impl->prev.from = impl->curr.to; |
| impl->prev.to = impl->wip .to; |
| |
| // |
| // start curr on a new subbuf boundary |
| // |
| skc_ringdex_wip_to_subbuf_inc(impl); |
| |
| impl->curr.from = impl->wip.to; |
| impl->curr.to = impl->wip.to; |
| } |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_path_builder_impl_acquire_subbuffer(struct skc_path_builder_impl * const impl, |
| skc_uint const subbuf) |
| { |
| // |
| // FIXME -- move to a power-of-two subbuf size and kickstart path |
| // copies as early as possible |
| // |
| // FIXME -- the subbufs "self-clock" (flow control) the kernel |
| // launches and accounting. Combine all the subbuffers and release |
| // records into a single indexable struct instead of 3. |
| // |
| struct skc_subbuffer_cmds * const sc = impl->cmds.subbufs + subbuf; |
| struct skc_release_record * const release = impl->release.records + subbuf; |
| struct skc_scheduler * const scheduler = impl->runtime->scheduler; |
| |
| // can't proceed until the paths have been released |
| SKC_SCHEDULER_WAIT_WHILE(scheduler,release->from != release->to); |
| |
| // throw in a scheduler yield ... FIXME -- get rid of |
| skc_scheduler_yield(scheduler); |
| |
| // can't proceed until the subbuffer is mapped |
| cl(WaitForEvents(1,&sc->map)); |
| } |
| |
| // |
| // |
| // |
| |
| static |
| union skc_ringdex_expand |
| skc_path_builder_impl_acquire_block(struct skc_path_builder_impl * const impl) |
| { |
| // break ringdex into components |
| union skc_ringdex_expand const to = skc_ringdex_expand(impl,impl->wip.to); |
| |
| // does wip ringdex point to a new subbuffer? |
| if (to.block == 0) |
| { |
| // potentially spin/block waiting for subbuffer |
| skc_path_builder_impl_acquire_subbuffer(impl,to.subbuf); |
| } |
| |
| // post increment wip.to |
| skc_ringdex_wip_to_block_inc(impl); |
| |
| return to; |
| } |
| |
| // |
| // |
| // |
| |
| static |
| skc_uint |
| skc_rolling_block(skc_uint const rolling, skc_uint const tag) |
| { |
| return rolling | tag; |
| } |
| |
| static |
| skc_uint |
| skc_rolling_subblock(skc_uint const rolling, skc_uint const subblock, skc_uint const tag) |
| { |
| return rolling | (subblock << SKC_TAGGED_BLOCK_ID_BITS_TAG) | tag; |
| } |
| |
| static |
| void |
| skc_rolling_inc(struct skc_path_builder_impl * const impl) |
| { |
| impl->wip.rolling.next += impl->wip.rolling.one; |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void * |
| skc_path_builder_impl_new_command(struct skc_path_builder_impl * const impl, |
| skc_uint const rolling, |
| skc_cmd_paths_copy_tag const tag) |
| { |
| // bump blocks count |
| impl->wip.head->header.blocks += 1; |
| |
| // acquire a block |
| union skc_ringdex_expand const to = skc_path_builder_impl_acquire_block(impl); |
| |
| // make a pointer |
| union skc_tagged_block_id * const cmds_subbuf = impl->cmds.subbufs[to.subbuf].host; |
| |
| // store command for block |
| cmds_subbuf[to.block].u32 = skc_rolling_block(rolling,tag); |
| |
| #if 0 |
| // store command for block |
| cmds_subbuf[to.block].u32 = skc_rolling_block(impl->wip.rolling.next,tag); |
| |
| // increment rolling |
| skc_rolling_inc(impl); |
| #endif |
| |
| // return pointer to block |
| float * const blocks_subbuf = impl->blocks.subbufs[to.subbuf].host; |
| |
| // FIXME -- make it easier to get config constant |
| return blocks_subbuf + (to.block * impl->runtime->config->block.words); |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_path_builder_impl_flush_node(struct skc_path_builder_impl * const impl) |
| { |
| // store command to subbuf and get pointer to blocks subbuf |
| void * const block = skc_path_builder_impl_new_command(impl,impl->wip.ids.rolling, |
| SKC_CMD_PATHS_COPY_TAG_NODE); |
| |
| // copy head to blocks subbuf -- write-only |
| memcpy(block,impl->wip.node,impl->runtime->config->block.bytes); |
| } |
| |
| static |
| void |
| skc_path_builder_impl_flush_head(struct skc_path_builder_impl * const impl) |
| { |
| // store command to subbuf and get pointer to blocks subbuf |
| void * const block = skc_path_builder_impl_new_command(impl,impl->wip.rolling.next, |
| SKC_CMD_PATHS_COPY_TAG_HEAD); |
| |
| // copy head to blocks subbuf -- write-only |
| memcpy(block,impl->wip.head,impl->runtime->config->block.bytes); |
| |
| // increment rolling |
| skc_rolling_inc(impl); |
| |
| // the 'to' index is non-inclusive so assign wip.to after flush_head |
| impl->curr.to = impl->wip.to; |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_path_builder_impl_new_node_block(struct skc_path_builder_impl * const impl) |
| { |
| // update final block id in node |
| impl->wip.ids.next->u32 = skc_rolling_block(impl->wip.rolling.next,SKC_BLOCK_ID_TAG_PATH_NEXT); |
| |
| // if wip.ids is not the header then flush now full wip node |
| if (impl->wip.head->header.nodes > 0) |
| skc_path_builder_impl_flush_node(impl); |
| |
| // bump node count |
| impl->wip.head->header.nodes += 1; |
| |
| // save current rolling |
| impl->wip.ids.rolling = impl->wip.rolling.next; |
| |
| // increment rolling |
| skc_rolling_inc(impl); |
| |
| // update wip.ids.* |
| impl->wip.ids.next = impl->wip.node->tag_ids; |
| impl->wip.ids.rem = impl->runtime->config->block.words; |
| } |
| |
| static |
| void |
| skc_path_builder_impl_new_segs_block(struct skc_path_builder_impl * const impl) |
| { |
| impl->wip.subblocks.rem = impl->runtime->config->block.subblocks; // FIXME -- move constants closer to structure |
| impl->wip.subblocks.rolling = impl->wip.rolling.next; |
| impl->wip.subblocks.next = skc_path_builder_impl_new_command(impl,impl->wip.rolling.next, |
| SKC_CMD_PATHS_COPY_TAG_SEGS); |
| impl->wip.subblocks.idx = 0; |
| |
| // increment rolling |
| skc_rolling_inc(impl); |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_path_builder_impl_acquire_subblocks(struct skc_path_builder_impl * const impl, |
| skc_block_id_tag tag, |
| skc_uint vertices, |
| float * * subblocks) |
| { |
| // |
| // FIRST TAG RECORDS THE ELEMENT TYPE |
| // |
| while (true) |
| { |
| // if only one block id left in node then acquire new node block |
| // and append its block id as with a next tag |
| if (impl->wip.ids.rem == 1) |
| skc_path_builder_impl_new_node_block(impl); |
| |
| // if zero subblocks left then acquire a new subblock block and |
| // append its block id |
| if (impl->wip.subblocks.rem == 0) |
| skc_path_builder_impl_new_segs_block(impl); |
| |
| // save first command -- tag and subblocks may have been updated |
| impl->wip.ids.next->u32 = skc_rolling_subblock(impl->wip.subblocks.rolling,impl->wip.subblocks.idx,tag); |
| |
| // increment node block subblock pointer |
| impl->wip.ids.next += 1; |
| impl->wip.ids.rem -= 1; |
| |
| // how many vertices can we store |
| skc_uint rem = min(vertices,impl->wip.subblocks.rem); |
| |
| // decrement vertices |
| vertices -= rem; |
| impl->wip.subblocks.rem -= rem; |
| impl->wip.subblocks.idx += rem; |
| |
| // assign subblocks |
| do { |
| *subblocks++ = impl->wip.subblocks.next; |
| impl->wip.subblocks.next += impl->runtime->config->subblock.words; |
| // FIXME -- move constants closer to structure |
| } while (--rem > 0); |
| |
| // anything left to do? |
| if (vertices == 0) |
| break; |
| |
| // any tag after this will be a caboose command |
| tag = SKC_BLOCK_ID_TAG_PATH_NEXT; |
| } |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_path_builder_pfn_end(struct skc_path_builder_impl * const impl, skc_path_t * const path) |
| { |
| // finalize incomplete active subblocks -- we don't care about any |
| // remaining unused subblocks in block |
| skc_path_builder_finalize_subblocks(impl->path_builder); |
| |
| // mark remaining wips.ids in the head or node as invalid |
| skc_path_builder_impl_finalize_node(impl); |
| |
| // flush node if rem > 0 and node is not actually head |
| if (impl->wip.head->header.nodes >= 1) |
| skc_path_builder_impl_flush_node(impl); |
| |
| // acquire path host id |
| *path = skc_runtime_handle_device_acquire(impl->runtime); // FIXME -- MAY WANT TO GRAB AN ID ON BEGIN |
| |
| // save path host handle |
| impl->wip.head->header.handle = *path; |
| |
| // flush head -- acquires a block and bumps head->header.blocks |
| skc_path_builder_impl_flush_head(impl); |
| |
| // get current release |
| struct skc_release_record * const release = skc_release_curr(impl); |
| |
| // acquire grid if null |
| if (release->grid == NULL) |
| { |
| release->grid = |
| SKC_GRID_DEPS_ATTACH(impl->runtime->deps, |
| &release->grid, // NULL on start/force |
| release, // data payload |
| skc_path_builder_grid_pfn_waiting, |
| NULL, // no execute pfn |
| skc_path_builder_grid_pfn_dispose); |
| } |
| |
| // update grid map |
| skc_grid_map(release->grid,*path); |
| |
| // update path release |
| impl->release.paths[release->to] = *path; |
| |
| // increment release.to |
| release->to = (release->to + 1) % impl->ring.blocks_per.buffer; |
| |
| // add guard bit |
| *path |= SKC_TYPED_HANDLE_TYPE_IS_PATH; |
| |
| #if 1 |
| // |
| // eager kernel launch? |
| // |
| { |
| union skc_ringdex_expand const curr_from = skc_ringdex_expand(impl,impl->curr.from); |
| union skc_ringdex_expand const curr_to = skc_ringdex_expand(impl,impl->curr.to); |
| |
| if (curr_from.subbuf != curr_to.subbuf) |
| { |
| skc_grid_start(release->grid); |
| // skc_scheduler_yield(impl->runtime->scheduler); |
| } |
| } |
| #endif |
| } |
| |
| // |
| // FIXME -- clean up accessing of CONFIG constants in these 3 routines |
| // |
| |
| static |
| void |
| skc_path_builder_pfn_new_line(struct skc_path_builder_impl * const impl) |
| { |
| // acquire subblock pointers |
| skc_path_builder_impl_acquire_subblocks(impl,SKC_BLOCK_ID_TAG_PATH_LINE,4, |
| impl->path_builder->line.coords); |
| |
| // increment line count |
| impl->wip.head->header.prims += 1; |
| |
| // update rem_count_xxx count |
| impl->path_builder->line.rem = impl->runtime->config->subblock.words; |
| } |
| |
| static |
| void |
| skc_path_builder_pfn_new_quad(struct skc_path_builder_impl * const impl) |
| { |
| // acquire subblock pointers |
| skc_path_builder_impl_acquire_subblocks(impl,SKC_BLOCK_ID_TAG_PATH_QUAD,6, |
| impl->path_builder->quad.coords); |
| |
| // increment line count |
| impl->wip.head->header.prims += 1; |
| |
| // update rem_count_xxx count |
| impl->path_builder->quad.rem = impl->runtime->config->subblock.words; |
| } |
| |
| static |
| void |
| skc_path_builder_pfn_new_cubic(struct skc_path_builder_impl * const impl) |
| { |
| // acquire subblock pointers |
| skc_path_builder_impl_acquire_subblocks(impl,SKC_BLOCK_ID_TAG_PATH_CUBIC,8, |
| impl->path_builder->cubic.coords); |
| |
| // increment line count |
| impl->wip.head->header.prims += 1; |
| |
| // update rem_count_xxx count |
| impl->path_builder->cubic.rem = impl->runtime->config->subblock.words; |
| } |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_path_builder_pfn_release(struct skc_path_builder_impl * const impl) |
| { |
| // decrement reference count |
| if (--impl->path_builder->refcount != 0) |
| return; |
| |
| // |
| // otherwise, dispose of everything |
| // |
| struct skc_runtime * const runtime = impl->runtime; |
| |
| // free path builder |
| skc_runtime_host_perm_free(impl->runtime,impl->path_builder); |
| |
| // release cq |
| skc_runtime_release_cq_in_order(runtime,impl->cq); |
| |
| // release kernels |
| cl(ReleaseKernel(impl->kernels.alloc)); |
| cl(ReleaseKernel(impl->kernels.copy)); |
| |
| // free blocks extents |
| cl(ReleaseMemObject(impl->blocks.buffer)); |
| skc_runtime_host_perm_free(runtime,impl->blocks.subbufs); |
| |
| cl(ReleaseMemObject(impl->cmds.buffer)); |
| skc_runtime_host_perm_free(runtime,impl->cmds.subbufs); |
| |
| // free records |
| skc_runtime_host_perm_free(runtime,impl->release.records); |
| skc_runtime_host_perm_free(runtime,impl->release.paths); |
| |
| // release staging head and node |
| skc_runtime_host_perm_free(runtime,impl->wip.head); |
| skc_runtime_host_perm_free(runtime,impl->wip.node); |
| |
| // release reads scratch array |
| cl(ReleaseMemObject(impl->reads)); |
| |
| // for all subbuffers |
| // unmap subbuffer |
| // release subbuffer |
| // printf("%s not releasing subbuffers\n",__func__); |
| |
| skc_runtime_host_perm_free(impl->runtime,impl); |
| } |
| |
| // |
| // |
| // |
| |
| skc_err |
| skc_path_builder_cl_12_create(struct skc_context * const context, |
| struct skc_path_builder * * const path_builder) |
| { |
| // |
| // retain the context |
| // skc_context_retain(context); |
| // |
| struct skc_runtime * const runtime = context->runtime; |
| |
| // allocate path builder |
| (*path_builder) = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(**path_builder)); |
| |
| // init state |
| SKC_ASSERT_STATE_INIT((*path_builder),SKC_PATH_BUILDER_STATE_READY); |
| |
| (*path_builder)->context = context; |
| |
| // save opaque impl-specific pointers |
| (*path_builder)->begin = skc_path_builder_pfn_begin; |
| (*path_builder)->end = skc_path_builder_pfn_end; |
| (*path_builder)->new_line = skc_path_builder_pfn_new_line; |
| (*path_builder)->new_quad = skc_path_builder_pfn_new_quad; |
| (*path_builder)->new_cubic = skc_path_builder_pfn_new_cubic; |
| (*path_builder)->release = skc_path_builder_pfn_release; |
| |
| // initialize path builder counts |
| (*path_builder)->line.rem = 0; |
| (*path_builder)->quad.rem = 0; |
| (*path_builder)->cubic.rem = 0; |
| |
| (*path_builder)->refcount = 1; |
| |
| struct skc_path_builder_impl * const impl = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(*impl)); |
| |
| (*path_builder)->impl = impl; |
| |
| // |
| // init impl |
| // |
| impl->path_builder = *path_builder; |
| impl->runtime = runtime; |
| |
| impl->cq = skc_runtime_acquire_cq_in_order(runtime); |
| |
| impl->kernels.alloc = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_PATHS_ALLOC); |
| impl->kernels.copy = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_PATHS_COPY); |
| |
| // |
| // FIXME -- let these config constants remain constant and in place |
| // |
| struct skc_config const * const config = runtime->config; |
| |
| impl->ring.subbufs = config->paths_copy.buffer.count; |
| impl->ring.blocks_per.buffer = config->paths_copy.subbuf.count * config->paths_copy.buffer.count; |
| impl->ring.blocks_per.subbuf = config->paths_copy.subbuf.count; |
| // |
| // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| // |
| |
| cl_int cl_err; |
| |
| // allocate large device-side extent for path data |
| impl->blocks.buffer = clCreateBuffer(runtime->cl.context, |
| CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, |
| config->paths_copy.block.buffer, // FIXME -- either use config or local constants everywhere |
| NULL,&cl_err); cl_ok(cl_err); |
| |
| // allocate small host-side array of pointers to mapped subbufs |
| impl->blocks.subbufs = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE, |
| impl->ring.subbufs * |
| sizeof(*impl->blocks.subbufs)); |
| |
| // allocate large device-side extent for path copy commands |
| impl->cmds.buffer = clCreateBuffer(runtime->cl.context, |
| CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, |
| config->paths_copy.command.buffer, |
| NULL,&cl_err); cl_ok(cl_err); |
| |
| // allocate small host-side array of pointers to mapped subbufs |
| impl->cmds.subbufs = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE, |
| impl->ring.subbufs * |
| sizeof(*impl->cmds.subbufs)); |
| |
| // allocate small host-side array of intervals of path handles |
| impl->release.records = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE, |
| impl->ring.subbufs * |
| sizeof(*impl->release.records)); |
| |
| // allocate large host-side array that is max # of path handles in flight |
| impl->release.paths = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE, |
| impl->ring.blocks_per.buffer * |
| sizeof(*impl->release.paths)); |
| |
| // small scratch used by kernels |
| impl->reads = clCreateBuffer(runtime->cl.context, |
| CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, |
| sizeof(skc_uint) * impl->ring.subbufs, |
| NULL,&cl_err); cl_ok(cl_err); |
| |
| // initialize release record with impl backpointer |
| for (skc_uint ii=0; ii<impl->ring.subbufs; ii++) |
| { |
| struct skc_release_record * record = impl->release.records + ii; |
| |
| record->impl = impl; |
| record->grid = NULL; |
| record->from = record->to = ii * impl->ring.blocks_per.subbuf; |
| } |
| |
| // |
| // allocate and map subbuffers -- we always check the command |
| // subbuffer's map/unmap events before touching it or its associated |
| // block subbuffer. |
| // |
| struct skc_subbuffer_blocks * sb = impl->blocks.subbufs; |
| struct skc_subbuffer_cmds * sc = impl->cmds .subbufs; |
| |
| cl_buffer_region rb = { 0, config->paths_copy.block.subbuf }; |
| cl_buffer_region rc = { 0, config->paths_copy.command.subbuf }; |
| |
| // for each subbuffer |
| for (skc_uint ii=0; ii<config->paths_copy.buffer.count; ii++) |
| { |
| sb->device = clCreateSubBuffer(impl->blocks.buffer, |
| CL_MEM_HOST_WRITE_ONLY, |
| CL_BUFFER_CREATE_TYPE_REGION, |
| &rb, |
| &cl_err); cl_ok(cl_err); |
| |
| sb->host = clEnqueueMapBuffer(impl->cq, |
| sb->device, |
| CL_FALSE, |
| CL_MAP_WRITE_INVALIDATE_REGION, |
| 0,rb.size, |
| 0,NULL,NULL, |
| &cl_err); cl_ok(cl_err); |
| |
| sc->device = clCreateSubBuffer(impl->cmds.buffer, |
| CL_MEM_HOST_WRITE_ONLY, |
| CL_BUFFER_CREATE_TYPE_REGION, |
| &rc, |
| &cl_err); cl_ok(cl_err); |
| |
| sc->host = clEnqueueMapBuffer(impl->cq, |
| sc->device, |
| CL_FALSE, |
| CL_MAP_WRITE_INVALIDATE_REGION, |
| 0,rc.size, |
| 0,NULL,&sc->map, |
| &cl_err); cl_ok(cl_err); |
| sb += 1; |
| sc += 1; |
| |
| rb.origin += rb.size; |
| rc.origin += rc.size; |
| } |
| |
| // |
| // initialize remaining members |
| // |
| impl->prev.from = 0; |
| impl->prev.to = 0; |
| impl->prev.rolling = 0; |
| |
| impl->curr.from = 0; |
| impl->curr.to = 0; |
| |
| impl->wip.to = 0; |
| |
| impl->wip.head = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,config->block.bytes); |
| impl->wip.node = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,config->block.bytes); |
| |
| impl->wip.rolling.one = SKC_BLOCK_ID_TAG_COUNT * config->block.subblocks; |
| impl->wip.rolling.next = 0; |
| |
| // for now, completely initialize builder before returning |
| cl(Finish(impl->cq)); |
| |
| return SKC_ERR_SUCCESS; |
| } |
| |
| // |
| // |
| // |