src/compute/skc/platforms/cl_12/kernels/block_pool_init.cl - skia - Git at Google

 /*
  * Copyright 2017 Google Inc.
  *
  * Use of this source code is governed by a BSD-style license that can
  * be found in the LICENSE file.
  *
  */

 //
 //
 //

 #include "kernel_cl_12.h"

 //
 // BEST TO RUN THESE ON AN OUT-OF-ORDER CQ
 //

 __kernel
 SKC_BP_INIT_IDS_KERNEL_ATTRIBS
 void
 skc_kernel_block_pool_init_ids(__global uint * const ids, uint const bp_size)
 {
   uint const gid = get_global_id(0);

   //
   // FIXME -- TUNE FOR ARCH -- evaluate if it's much faster to
   // accomplish this with fewer threads and using either IPC and/or
   // vector stores -- it should be on certain architectures!
   //

   //
   // initialize pool with sequence
   //
   if (gid < bp_size)
     ids[gid] = gid * SKC_DEVICE_SUBBLOCKS_PER_BLOCK;
 }

 //
 //
 //

 __kernel
 SKC_BP_INIT_ATOMICS_KERNEL_ATTRIBS
 void
 skc_kernel_block_pool_init_atomics(__global uint * const bp_atomics, uint const bp_size)
 {
   // the version test is to squelch a bug with the Intel OpenCL CPU
   // compiler declaring it supports the cl_intel_subgroups extension
 #if defined(cl_intel_subgroups) || defined (cl_khr_subgroups)
   uint const tid = get_sub_group_local_id();
 #else
   uint const tid = get_local_id(0);
 #endif

   //
   // launch two threads and store [ 0, bp_size ]
   //
   bp_atomics[tid] = tid * bp_size;
 }

 //
 //
 //
	/*
	* Copyright 2017 Google Inc.
	*
	* Use of this source code is governed by a BSD-style license that can
	* be found in the LICENSE file.
	*
	*/

	//
	//
	//

	#include "kernel_cl_12.h"

	//
	// BEST TO RUN THESE ON AN OUT-OF-ORDER CQ
	//

	__kernel
	SKC_BP_INIT_IDS_KERNEL_ATTRIBS
	void
	skc_kernel_block_pool_init_ids(__global uint * const ids, uint const bp_size)
	{
	uint const gid = get_global_id(0);

	//
	// FIXME -- TUNE FOR ARCH -- evaluate if it's much faster to
	// accomplish this with fewer threads and using either IPC and/or
	// vector stores -- it should be on certain architectures!
	//

	//
	// initialize pool with sequence
	//
	if (gid < bp_size)
	ids[gid] = gid * SKC_DEVICE_SUBBLOCKS_PER_BLOCK;
	}

	//
	//
	//

	__kernel
	SKC_BP_INIT_ATOMICS_KERNEL_ATTRIBS
	void
	skc_kernel_block_pool_init_atomics(__global uint * const bp_atomics, uint const bp_size)
	{
	// the version test is to squelch a bug with the Intel OpenCL CPU
	// compiler declaring it supports the cl_intel_subgroups extension
	#if defined(cl_intel_subgroups) \|\| defined (cl_khr_subgroups)
	uint const tid = get_sub_group_local_id();
	#else
	uint const tid = get_local_id(0);
	#endif

	//
	// launch two threads and store [ 0, bp_size ]
	//
	bp_atomics[tid] = tid * bp_size;
	}

	//
	//
	//