blob: 5abbe189393b80ba8f596d52762b048dbdf2c337 [file] [log] [blame]
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can
* be found in the LICENSE file.
*
*/
//
//
//
#include "kernel_cl_12.h"
//
// BEST TO RUN THESE ON AN OUT-OF-ORDER CQ
//
__kernel
SKC_BP_INIT_IDS_KERNEL_ATTRIBS
void
skc_kernel_block_pool_init_ids(__global uint * const ids, uint const bp_size)
{
uint const gid = get_global_id(0);
//
// FIXME -- TUNE FOR ARCH -- evaluate if it's much faster to
// accomplish this with fewer threads and using either IPC and/or
// vector stores -- it should be on certain architectures!
//
//
// initialize pool with sequence
//
if (gid < bp_size)
ids[gid] = gid * SKC_DEVICE_SUBBLOCKS_PER_BLOCK;
}
//
//
//
__kernel
SKC_BP_INIT_ATOMICS_KERNEL_ATTRIBS
void
skc_kernel_block_pool_init_atomics(__global uint * const bp_atomics, uint const bp_size)
{
// the version test is to squelch a bug with the Intel OpenCL CPU
// compiler declaring it supports the cl_intel_subgroups extension
#if defined(cl_intel_subgroups) || defined (cl_khr_subgroups)
uint const tid = get_sub_group_local_id();
#else
uint const tid = get_local_id(0);
#endif
//
// launch two threads and store [ 0, bp_size ]
//
bp_atomics[tid] = tid * bp_size;
}
//
//
//