| /* |
| * Copyright 2017 Google Inc. |
| * |
| * Use of this source code is governed by a BSD-style license that can |
| * be found in the LICENSE file. |
| * |
| */ |
| |
| // |
| // |
| // |
| |
| #include "kernel_cl_12.h" |
| |
| // |
| // BEST TO RUN THESE ON AN OUT-OF-ORDER CQ |
| // |
| |
| __kernel |
| SKC_BP_INIT_IDS_KERNEL_ATTRIBS |
| void |
| skc_kernel_block_pool_init_ids(__global uint * const ids, uint const bp_size) |
| { |
| uint const gid = get_global_id(0); |
| |
| // |
| // FIXME -- TUNE FOR ARCH -- evaluate if it's much faster to |
| // accomplish this with fewer threads and using either IPC and/or |
| // vector stores -- it should be on certain architectures! |
| // |
| |
| // |
| // initialize pool with sequence |
| // |
| if (gid < bp_size) |
| ids[gid] = gid * SKC_DEVICE_SUBBLOCKS_PER_BLOCK; |
| } |
| |
| // |
| // |
| // |
| |
| __kernel |
| SKC_BP_INIT_ATOMICS_KERNEL_ATTRIBS |
| void |
| skc_kernel_block_pool_init_atomics(__global uint * const bp_atomics, uint const bp_size) |
| { |
| // the version test is to squelch a bug with the Intel OpenCL CPU |
| // compiler declaring it supports the cl_intel_subgroups extension |
| #if defined(cl_intel_subgroups) || defined (cl_khr_subgroups) |
| uint const tid = get_sub_group_local_id(); |
| #else |
| uint const tid = get_local_id(0); |
| #endif |
| |
| // |
| // launch two threads and store [ 0, bp_size ] |
| // |
| bp_atomics[tid] = tid * bp_size; |
| } |
| |
| // |
| // |
| // |