src/compute/skc/platforms/cl_12/runtime_cl_12.c - skia - Git at Google

 /*
  * Copyright 2017 Google Inc.
  *
  * Use of this source code is governed by a BSD-style license that can
  * be found in the LICENSE file.
  *
  */

 //
 //
 //

 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <assert.h>

 //
 //
 //

 #include "context.h"
 #include "block.h"
 #include "grid.h"
 #include "common/cl/assert_cl.h"
 #include "config_cl.h"
 #include "runtime_cl_12.h"
 #include "export_cl_12.h"

 //
 //
 //

 static
 void
 skc_block_pool_create(struct skc_runtime * const runtime, cl_command_queue cq)
 {
   // save size
   runtime->block_pool.size = &runtime->config->block_pool;

   // create block extent
   skc_extent_pdrw_alloc(runtime,
                         &runtime->block_pool.blocks,
                         runtime->block_pool.size->pool_size *
                         runtime->config->block.bytes);

   // allocate block pool ids
   skc_extent_pdrw_alloc(runtime,
                         &runtime->block_pool.ids,
                         runtime->block_pool.size->ring_pow2 * sizeof(skc_uint));

   // allocate block pool atomics
   skc_extent_phr_pdrw_alloc(runtime,
                             &runtime->block_pool.atomics,
                             sizeof(union skc_block_pool_atomic));

   // acquire pool id and atomic initialization kernels
   cl_kernel k0 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS);
   cl_kernel k1 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS);

   // init ids
   cl(SetKernelArg(k0,0,sizeof(runtime->block_pool.ids.drw),&runtime->block_pool.ids.drw));
   cl(SetKernelArg(k0,1,SKC_CL_ARG(runtime->block_pool.size->pool_size)));

   // the kernel grid is shaped by the target device -- always 2 for atomics
   skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS,
                             cq,k0,runtime->block_pool.size->pool_size,
                             0,NULL,NULL);

   // init atomics
   cl(SetKernelArg(k1,0,sizeof(runtime->block_pool.atomics.drw),&runtime->block_pool.atomics.drw));
   cl(SetKernelArg(k1,1,SKC_CL_ARG(runtime->block_pool.size->pool_size)));

   // the kernel grid is shaped by the target device
   skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS,
                             cq,k1,2,
                             0,NULL,NULL);

   // kickstart kernel execution
   cl(Flush(cq));

   // release kernels
   cl(ReleaseKernel(k0));
   cl(ReleaseKernel(k1));
 }

 static
 void
 skc_block_pool_dispose(struct skc_runtime * const runtime)
 {
   skc_extent_phr_pdrw_free(runtime,&runtime->block_pool.atomics);
   skc_extent_pdrw_free    (runtime,&runtime->block_pool.ids);
   skc_extent_pdrw_free    (runtime,&runtime->block_pool.blocks);
 }

 //
 //
 //

 static
 bool
 skc_runtime_yield(struct skc_runtime * const runtime)
 {
   return skc_scheduler_yield(runtime->scheduler);
 }

 static
 void
 skc_runtime_wait(struct skc_runtime * const runtime)
 {
   skc_scheduler_wait(runtime->scheduler);
 }

 //
 //
 //

 skc_err
 skc_runtime_cl_12_create(struct skc_context * const context,
                          cl_context                 context_cl,
                          cl_device_id               device_id_cl)
 {
   // allocate the runtime
   struct skc_runtime * const runtime = malloc(sizeof(*runtime));

   // save off CL objects
   runtime->cl.context   = context_cl;
   runtime->cl.device_id = device_id_cl;

   // query device alignment
   cl_uint align_bits;

   cl(GetDeviceInfo(device_id_cl,
                    CL_DEVICE_MEM_BASE_ADDR_ALIGN,
                    sizeof(align_bits),
                    &align_bits,
                    NULL));

   runtime->cl.align_bytes = align_bits / 8;

   // create device
   skc_device_create(runtime);

   // create the host and device allocators
   skc_allocator_host_create(runtime);
   skc_allocator_device_create(runtime);

   // how many slots in the scheduler?
   runtime->scheduler = skc_scheduler_create(runtime,runtime->config->scheduler.size);

   // allocate deps structure
   runtime->deps      = skc_grid_deps_create(runtime,
                                             runtime->scheduler,
                                             runtime->config->block_pool.pool_size);

   // initialize cq pool
   skc_cq_pool_create(runtime,
                      &runtime->cq_pool,
                      runtime->config->cq_pool.cq_props,
                      runtime->config->cq_pool.size);

   // acquire in-order cq
   cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime);

   // initialize block pool
   skc_block_pool_create(runtime,cq);

   // intialize handle pool
   skc_handle_pool_create(runtime,
                          &runtime->handle_pool,
                          runtime->config->handle_pool.size,
                          runtime->config->handle_pool.width,
                          runtime->config->handle_pool.recs);

   //
   // initialize pfns
   //
   // FIXME -- at this point we will have identified which device we've
   // targeted and will load a DLL (or select from a built-in library)
   // that contains all the pfns.
   //
   context->runtime        = runtime;

   context->yield          = skc_runtime_yield;
   context->wait           = skc_runtime_wait;

   context->path_builder   = skc_path_builder_cl_12_create;
   context->path_retain    = skc_runtime_path_host_retain;
   context->path_release   = skc_runtime_path_host_release;
   context->path_flush     = skc_runtime_path_host_flush;

   context->raster_builder = skc_raster_builder_cl_12_create;
   context->raster_retain  = skc_runtime_raster_host_retain;
   context->raster_release = skc_runtime_raster_host_release;
   context->raster_flush   = skc_runtime_raster_host_flush;

   context->composition    = skc_composition_cl_12_create;
   context->styling        = skc_styling_cl_12_create;

   context->surface        = skc_surface_cl_12_create;

   // block on pool creation
   cl(Finish(cq));

   // dispose of in-order cq
   skc_runtime_release_cq_in_order(runtime,cq);

   return SKC_ERR_SUCCESS;
 };

 //
 //
 //

 skc_err
 skc_runtime_cl_12_dispose(struct skc_context * const context)
 {
   //
   // FIXME -- incomplete
   //
   fprintf(stderr,"%s incomplete!\n",__func__);

   struct skc_runtime * runtime = context->runtime;

   skc_allocator_device_dispose(runtime);
   skc_allocator_host_dispose(runtime);

   skc_scheduler_dispose(context->runtime,context->runtime->scheduler);

   skc_grid_deps_dispose(context->runtime->deps);

   skc_cq_pool_dispose(runtime,&runtime->cq_pool);

   skc_block_pool_dispose(context->runtime);

   // skc_handle_pool_dispose(context->runtime);

   return SKC_ERR_SUCCESS;
 }

 //
 // REPORT BLOCK POOL ALLOCATION
 //

 void
 skc_runtime_cl_12_debug(struct skc_context * const context)
 {
   struct skc_runtime * const runtime = context->runtime;

   // acquire out-of-order cq
   cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime);

   // copy atomics to host
   skc_extent_phr_pdrw_read(&runtime->block_pool.atomics,cq,NULL);

   // block until complete
   cl(Finish(cq));

   // dispose of out-of-order cq
   skc_runtime_release_cq_in_order(runtime,cq);

   union skc_block_pool_atomic const * const bp_atomic = runtime->block_pool.atomics.hr;

   skc_uint const available = bp_atomic->writes - bp_atomic->reads;
   skc_uint const inuse     = runtime->config->block_pool.pool_size - available;

   fprintf(stderr,
           "writes/reads/avail/alloc: %9u / %9u / %9u = %6.2f MB / %9u = %6.2f MB\n",
           bp_atomic->writes,
           bp_atomic->reads,
           available,
           (available * runtime->config->block.bytes) / (1024.0*1024.0),
           inuse,
           (inuse     * runtime->config->block.bytes) / (1024.0*1024.0));
 }

 //
 //
 //
	/*
	* Copyright 2017 Google Inc.
	*
	* Use of this source code is governed by a BSD-style license that can
	* be found in the LICENSE file.
	*
	*/

	//
	//
	//

	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include <assert.h>

	//
	//
	//

	#include "context.h"
	#include "block.h"
	#include "grid.h"
	#include "common/cl/assert_cl.h"
	#include "config_cl.h"
	#include "runtime_cl_12.h"
	#include "export_cl_12.h"

	//
	//
	//

	static
	void
	skc_block_pool_create(struct skc_runtime * const runtime, cl_command_queue cq)
	{
	// save size
	runtime->block_pool.size = &runtime->config->block_pool;

	// create block extent
	skc_extent_pdrw_alloc(runtime,
	&runtime->block_pool.blocks,
	runtime->block_pool.size->pool_size *
	runtime->config->block.bytes);

	// allocate block pool ids
	skc_extent_pdrw_alloc(runtime,
	&runtime->block_pool.ids,
	runtime->block_pool.size->ring_pow2 * sizeof(skc_uint));

	// allocate block pool atomics
	skc_extent_phr_pdrw_alloc(runtime,
	&runtime->block_pool.atomics,
	sizeof(union skc_block_pool_atomic));

	// acquire pool id and atomic initialization kernels
	cl_kernel k0 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS);
	cl_kernel k1 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS);

	// init ids
	cl(SetKernelArg(k0,0,sizeof(runtime->block_pool.ids.drw),&runtime->block_pool.ids.drw));
	cl(SetKernelArg(k0,1,SKC_CL_ARG(runtime->block_pool.size->pool_size)));

	// the kernel grid is shaped by the target device -- always 2 for atomics
	skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS,
	cq,k0,runtime->block_pool.size->pool_size,
	0,NULL,NULL);

	// init atomics
	cl(SetKernelArg(k1,0,sizeof(runtime->block_pool.atomics.drw),&runtime->block_pool.atomics.drw));
	cl(SetKernelArg(k1,1,SKC_CL_ARG(runtime->block_pool.size->pool_size)));

	// the kernel grid is shaped by the target device
	skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS,
	cq,k1,2,
	0,NULL,NULL);

	// kickstart kernel execution
	cl(Flush(cq));

	// release kernels
	cl(ReleaseKernel(k0));
	cl(ReleaseKernel(k1));
	}

	static
	void
	skc_block_pool_dispose(struct skc_runtime * const runtime)
	{
	skc_extent_phr_pdrw_free(runtime,&runtime->block_pool.atomics);
	skc_extent_pdrw_free (runtime,&runtime->block_pool.ids);
	skc_extent_pdrw_free (runtime,&runtime->block_pool.blocks);
	}

	//
	//
	//

	static
	bool
	skc_runtime_yield(struct skc_runtime * const runtime)
	{
	return skc_scheduler_yield(runtime->scheduler);
	}

	static
	void
	skc_runtime_wait(struct skc_runtime * const runtime)
	{
	skc_scheduler_wait(runtime->scheduler);
	}

	//
	//
	//

	skc_err
	skc_runtime_cl_12_create(struct skc_context * const context,
	cl_context context_cl,
	cl_device_id device_id_cl)
	{
	// allocate the runtime
	struct skc_runtime * const runtime = malloc(sizeof(*runtime));

	// save off CL objects
	runtime->cl.context = context_cl;
	runtime->cl.device_id = device_id_cl;

	// query device alignment
	cl_uint align_bits;

	cl(GetDeviceInfo(device_id_cl,
	CL_DEVICE_MEM_BASE_ADDR_ALIGN,
	sizeof(align_bits),
	&align_bits,
	NULL));

	runtime->cl.align_bytes = align_bits / 8;

	// create device
	skc_device_create(runtime);

	// create the host and device allocators
	skc_allocator_host_create(runtime);
	skc_allocator_device_create(runtime);

	// how many slots in the scheduler?
	runtime->scheduler = skc_scheduler_create(runtime,runtime->config->scheduler.size);

	// allocate deps structure
	runtime->deps = skc_grid_deps_create(runtime,
	runtime->scheduler,
	runtime->config->block_pool.pool_size);

	// initialize cq pool
	skc_cq_pool_create(runtime,
	&runtime->cq_pool,
	runtime->config->cq_pool.cq_props,
	runtime->config->cq_pool.size);

	// acquire in-order cq
	cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime);

	// initialize block pool
	skc_block_pool_create(runtime,cq);

	// intialize handle pool
	skc_handle_pool_create(runtime,
	&runtime->handle_pool,
	runtime->config->handle_pool.size,
	runtime->config->handle_pool.width,
	runtime->config->handle_pool.recs);

	//
	// initialize pfns
	//
	// FIXME -- at this point we will have identified which device we've
	// targeted and will load a DLL (or select from a built-in library)
	// that contains all the pfns.
	//
	context->runtime = runtime;

	context->yield = skc_runtime_yield;
	context->wait = skc_runtime_wait;

	context->path_builder = skc_path_builder_cl_12_create;
	context->path_retain = skc_runtime_path_host_retain;
	context->path_release = skc_runtime_path_host_release;
	context->path_flush = skc_runtime_path_host_flush;

	context->raster_builder = skc_raster_builder_cl_12_create;
	context->raster_retain = skc_runtime_raster_host_retain;
	context->raster_release = skc_runtime_raster_host_release;
	context->raster_flush = skc_runtime_raster_host_flush;

	context->composition = skc_composition_cl_12_create;
	context->styling = skc_styling_cl_12_create;

	context->surface = skc_surface_cl_12_create;

	// block on pool creation
	cl(Finish(cq));

	// dispose of in-order cq
	skc_runtime_release_cq_in_order(runtime,cq);

	return SKC_ERR_SUCCESS;
	};

	//
	//
	//

	skc_err
	skc_runtime_cl_12_dispose(struct skc_context * const context)
	{
	//
	// FIXME -- incomplete
	//
	fprintf(stderr,"%s incomplete!\n",__func__);

	struct skc_runtime * runtime = context->runtime;

	skc_allocator_device_dispose(runtime);
	skc_allocator_host_dispose(runtime);

	skc_scheduler_dispose(context->runtime,context->runtime->scheduler);

	skc_grid_deps_dispose(context->runtime->deps);

	skc_cq_pool_dispose(runtime,&runtime->cq_pool);

	skc_block_pool_dispose(context->runtime);

	// skc_handle_pool_dispose(context->runtime);

	return SKC_ERR_SUCCESS;
	}

	//
	// REPORT BLOCK POOL ALLOCATION
	//

	void
	skc_runtime_cl_12_debug(struct skc_context * const context)
	{
	struct skc_runtime * const runtime = context->runtime;

	// acquire out-of-order cq
	cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime);

	// copy atomics to host
	skc_extent_phr_pdrw_read(&runtime->block_pool.atomics,cq,NULL);

	// block until complete
	cl(Finish(cq));

	// dispose of out-of-order cq
	skc_runtime_release_cq_in_order(runtime,cq);

	union skc_block_pool_atomic const * const bp_atomic = runtime->block_pool.atomics.hr;

	skc_uint const available = bp_atomic->writes - bp_atomic->reads;
	skc_uint const inuse = runtime->config->block_pool.pool_size - available;

	fprintf(stderr,
	"writes/reads/avail/alloc: %9u / %9u / %9u = %6.2f MB / %9u = %6.2f MB\n",
	bp_atomic->writes,
	bp_atomic->reads,
	available,
	(available * runtime->config->block.bytes) / (1024.0*1024.0),
	inuse,
	(inuse * runtime->config->block.bytes) / (1024.0*1024.0));
	}

	//
	//
	//