src/compute/skc/suballocator.c - skia - Git at Google

 /*
  * Copyright 2018 Google Inc.
  *
  * Use of this source code is governed by a BSD-style license that can
  * be found in the LICENSE file.
  *
  */

 //
 //
 //

 #include <assert.h>
 #include <memory.h>

 #include "runtime_cl_12.h"
 #include "scheduler.h"

 //
 //
 //

 #ifndef NDEBUG

 #include <stdio.h>

 #define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss)         \
   fprintf(stderr,                                                       \
           "suballocator %s : [ %4u ] : alloc( %9u ) @ %4u = %u\n",      \
           suballocator->name,                                           \
           suballocator->rem.avail,                                      \
           (skc_uint)ss,                                                 \
           subbuf_id,                                                    \
           (skc_uint)suballocator->total);

 #define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss)          \
   fprintf(stderr,                                                       \
           "suballocator %s : [ %4u ] : free ( %9u ) @ %4u = %u\n",      \
           suballocator->name,                                           \
           suballocator->rem.avail,                                      \
           (skc_uint)ss,                                                 \
           subbuf_id,                                                    \
           (skc_uint)suballocator->total);

 #else

 #define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss)
 #define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss)

 #endif

 //
 //
 //

 void
 skc_suballocator_create(struct skc_runtime      * const runtime,
                         struct skc_suballocator * const suballocator,
                         char              const * const name,
                         skc_uint                  const subbufs,
                         size_t                    const align,
                         size_t                    const size)
 {
   size_t const subbufs_size = sizeof(*suballocator->subbufs) * subbufs;

   // allocate array of subbuf records
   suballocator->subbufs = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,subbufs_size);

   // zero subbufs
   memset(suballocator->subbufs,0,subbufs_size);

   // initialize starting subbuf
   suballocator->subbufs[0].size = (skc_subbuf_size_t)size;

   // allocate array of ids
   suballocator->ids = skc_runtime_host_perm_alloc(runtime,
                                                   SKC_MEM_FLAGS_READ_WRITE,
                                                   sizeof(*suballocator->ids) * subbufs);
   for (skc_uint ii=0; ii<subbufs; ii++)
     suballocator->ids[ii] = ii;

   suballocator->rem.avail = 1;
   suballocator->rem.spare = subbufs - 1;

   suballocator->align     = (skc_uint)align;
   suballocator->count     = subbufs;

   suballocator->size      = (skc_subbuf_size_t)size;
   suballocator->total     = 0;

   suballocator->name      = name;
 }

 void
 skc_suballocator_dispose(struct skc_runtime      * const runtime,
                          struct skc_suballocator * const suballocator)
 {
   skc_runtime_host_perm_free(runtime,suballocator->ids);
   skc_runtime_host_perm_free(runtime,suballocator->subbufs);
 }


 //
 // Sets id and returns origin
 //

 size_t
 skc_suballocator_subbuf_alloc(struct skc_suballocator * const suballocator,
                               struct skc_scheduler    * const scheduler,
                               size_t                    const size,
                               skc_subbuf_id_t         * const subbuf_id,
                               size_t                  * const subbuf_size)
 {
   //
   // Note that we can't deadlock here because everything allocated is
   // expected to be freed within msecs.  Worst case, we wait for a
   // availability of resources while a fully utilized GPU is making
   // forward progress on kernels.
   //
   // This behavior should guide the sizing of the suballocator's
   // number of subbuffers and extent.
   //
   // We want to allocate a large enough extent and enough subbuffer
   // records so that the CPU/GPU is never starved.
   //

   // round up the size
   skc_subbuf_size_t const size_ru = (skc_subbuf_size_t)SKC_ROUND_UP_POW2(size,suballocator->align);

   // save it
   if (subbuf_size != NULL)
     *subbuf_size = size_ru;

   //
   // We precheck to see there is at least one region of memory
   // available but do not check to see if there is a spare. Instead,
   // we simply keep looking for an exact fit.
   //
   skc_subbuf_id_t * const ids = suballocator->ids;

   while (true)
     {
       skc_uint avail_rem = suballocator->rem.avail;
       skc_uint spare_rem = suballocator->rem.spare;

       for (skc_uint avail_idx=0; avail_idx<avail_rem; avail_idx++)
         {
           skc_subbuf_id_t     const avail_id = ids[avail_idx];
           struct skc_subbuf * const avail    = suballocator->subbufs + avail_id;

           assert(avail->inuse == 0);

           if (avail->size == size_ru) // size matches exactly
             {
               suballocator->total += size_ru;

               // return this id
               *subbuf_id = avail_id;

               SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,avail_id,size_ru);

               // mark the subbuffer as in use
               avail->inuse += 1;

               assert(avail->inuse == 1);

               // update rem avail count
               suballocator->rem.avail = --avail_rem;

               // replace now inuse id with last avail id
               if ((avail_rem > 0) && (avail_idx != avail_rem))
                 {
                   skc_subbuf_id_t     const last_id = ids[avail_rem];
                   struct skc_subbuf * const last    = suballocator->subbufs + last_id;

                   ids[avail_idx] = last_id;   // move id
                   last->idx      = avail_idx; // update idx[]
                 }

               assert(suballocator->rem.avail > 0);

               // return origin
               return avail->origin;
             }
           else if ((avail->size > size_ru) && (spare_rem > 0)) // requested is less than available so split it
             {
               suballocator->total += size_ru;

               skc_uint                  spare_idx = suballocator->count - spare_rem;
               skc_subbuf_id_t     const spare_id  = ids[spare_idx];
               struct skc_subbuf * const spare     = suballocator->subbufs + spare_id;

               assert(spare->inuse == 0);

               // simple -- we're popping the top-of-stack of spares
               suballocator->rem.spare -= 1;

               // return id
               *subbuf_id = spare_id;

               SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,spare_id,size_ru);

               // get prev
               struct skc_subbuf * const prev = avail->prev;

               if (prev != NULL)
                 prev->next = spare;

               // init spare
               spare->prev    = prev;
               spare->next    = avail;
               spare->size    = size_ru;
               spare->origin  = avail->origin;
               spare->idx     = SKC_UINT_MAX; // defensive
               spare->inuse  += 1;

               // update curr
               avail->prev    = spare;
               avail->size   -= size_ru;
               avail->origin += size_ru;

               assert(suballocator->rem.avail > 0);

               return spare->origin;
             }
         }

       // uh oh... couldn't find enough memory
       skc_scheduler_wait(scheduler);
     }
 }

 //
 // FIXME -- simplify this with a merge-with-prev() primitive
 //

 void
 skc_suballocator_subbuf_free(struct skc_suballocator * const suballocator,
                              skc_subbuf_id_t                 subbuf_id)
 {
   // get subbuf for id
   struct skc_subbuf * const subbuf = suballocator->subbufs + subbuf_id;

   assert(subbuf->inuse == 1);

   suballocator->total -= subbuf->size;

   SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,subbuf->size);

   //
   // try to merge subbuf with left and maybe right and then dispose
   //
   struct skc_subbuf * prev;
   struct skc_subbuf * next;

   if (((prev = subbuf->prev) != NULL) && !prev->inuse)
     {
       next = subbuf->next;

       if ((next != NULL) && !next->inuse)
         {
           subbuf->inuse -= 1;

           assert(next->inuse == 0);

           // increment size
           prev->size += (subbuf->size + next->size);

           struct skc_subbuf * const nextnext = next->next;

           // update next link
           prev->next = nextnext;

           // update prev link
           if (nextnext != NULL)
             nextnext->prev = prev;

           //
           // both subbuf and next are now spare which means we need to
           // move final available subbuffer into next's old position
           // unless they're the same
           //
           skc_uint const last_idx = --suballocator->rem.avail;
           skc_uint const next_idx = next->idx;

           assert(suballocator->rem.avail > 0);

           if (last_idx != next_idx)
             {
               skc_subbuf_id_t     const last_id = suballocator->ids[last_idx];
               struct skc_subbuf * const last    = suballocator->subbufs + last_id;

               suballocator->ids[next_idx]       = last_id;
               last->idx                         = next_idx;
             }

           skc_subbuf_id_t  const next_id   = (skc_subbuf_id_t)(next - suballocator->subbufs);

           skc_uint         const spare_rem = suballocator->rem.spare + 2;
           skc_uint         const spare_idx = suballocator->count - spare_rem;

           suballocator->rem.spare          = spare_rem;
           suballocator->ids[spare_idx + 0] = subbuf_id;
           suballocator->ids[spare_idx + 1] = next_id;
         }
       else
         {
           prev->size += subbuf->size;
           prev->next  = next;

           if (next != NULL)
             next->prev = prev;

           subbuf->inuse -= 1;

           assert(subbuf->inuse == 0);
           assert(suballocator->rem.avail > 0);

           suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id;
         }
     }
   //
   // try to merge with right
   //
   else if (((next = subbuf->next) != NULL) && !next->inuse)
     {
       subbuf->inuse -= 1;

       assert(subbuf->inuse == 0);
       assert(suballocator->rem.avail > 0);

       next->prev     = prev;
       next->origin   = subbuf->origin;
       next->size    += subbuf->size;

       if (prev != NULL)
         prev->next = next;

       // subbuf is now spare
       suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id;
     }
   else // couldn't merge with a neighbor
     {
       skc_uint avail_idx = suballocator->rem.avail++;

       // subbuf is now available
       subbuf->idx    = avail_idx;
       subbuf->inuse -= 1;

       assert(subbuf->inuse == 0);
       assert(suballocator->rem.avail > 0);

       suballocator->ids[avail_idx] = subbuf_id;
     }
 }

 //
 //
 //

 #if 0

 //
 // At some point there might be a reason to sort the available
 // subbuffers into some useful order -- presumably to binary search
 // for the closest match or to chip away at the largest available
 // subbuffer
 //

 static
 void
 skc_suballocator_optimize(struct skc_suballocator * const suballocator)
 {
   ;
 }

 #endif

 //
 //
 //
	/*
	* Copyright 2018 Google Inc.
	*
	* Use of this source code is governed by a BSD-style license that can
	* be found in the LICENSE file.
	*
	*/

	//
	//
	//

	#include <assert.h>
	#include <memory.h>

	#include "runtime_cl_12.h"
	#include "scheduler.h"

	//
	//
	//

	#ifndef NDEBUG

	#include <stdio.h>

	#define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss) \
	fprintf(stderr, \
	"suballocator %s : [ %4u ] : alloc( %9u ) @ %4u = %u\n", \
	suballocator->name, \
	suballocator->rem.avail, \
	(skc_uint)ss, \
	subbuf_id, \
	(skc_uint)suballocator->total);

	#define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss) \
	fprintf(stderr, \
	"suballocator %s : [ %4u ] : free ( %9u ) @ %4u = %u\n", \
	suballocator->name, \
	suballocator->rem.avail, \
	(skc_uint)ss, \
	subbuf_id, \
	(skc_uint)suballocator->total);

	#else

	#define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss)
	#define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss)

	#endif

	//
	//
	//

	void
	skc_suballocator_create(struct skc_runtime * const runtime,
	struct skc_suballocator * const suballocator,
	char const * const name,
	skc_uint const subbufs,
	size_t const align,
	size_t const size)
	{
	size_t const subbufs_size = sizeof(suballocator->subbufs) subbufs;

	// allocate array of subbuf records
	suballocator->subbufs = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,subbufs_size);

	// zero subbufs
	memset(suballocator->subbufs,0,subbufs_size);

	// initialize starting subbuf
	suballocator->subbufs[0].size = (skc_subbuf_size_t)size;

	// allocate array of ids
	suballocator->ids = skc_runtime_host_perm_alloc(runtime,
	SKC_MEM_FLAGS_READ_WRITE,
	sizeof(suballocator->ids) subbufs);
	for (skc_uint ii=0; ii<subbufs; ii++)
	suballocator->ids[ii] = ii;

	suballocator->rem.avail = 1;
	suballocator->rem.spare = subbufs - 1;

	suballocator->align = (skc_uint)align;
	suballocator->count = subbufs;

	suballocator->size = (skc_subbuf_size_t)size;
	suballocator->total = 0;

	suballocator->name = name;
	}

	void
	skc_suballocator_dispose(struct skc_runtime * const runtime,
	struct skc_suballocator * const suballocator)
	{
	skc_runtime_host_perm_free(runtime,suballocator->ids);
	skc_runtime_host_perm_free(runtime,suballocator->subbufs);
	}


	//
	// Sets id and returns origin
	//

	size_t
	skc_suballocator_subbuf_alloc(struct skc_suballocator * const suballocator,
	struct skc_scheduler * const scheduler,
	size_t const size,
	skc_subbuf_id_t * const subbuf_id,
	size_t * const subbuf_size)
	{
	//
	// Note that we can't deadlock here because everything allocated is
	// expected to be freed within msecs. Worst case, we wait for a
	// availability of resources while a fully utilized GPU is making
	// forward progress on kernels.
	//
	// This behavior should guide the sizing of the suballocator's
	// number of subbuffers and extent.
	//
	// We want to allocate a large enough extent and enough subbuffer
	// records so that the CPU/GPU is never starved.
	//

	// round up the size
	skc_subbuf_size_t const size_ru = (skc_subbuf_size_t)SKC_ROUND_UP_POW2(size,suballocator->align);

	// save it
	if (subbuf_size != NULL)
	*subbuf_size = size_ru;

	//
	// We precheck to see there is at least one region of memory
	// available but do not check to see if there is a spare. Instead,
	// we simply keep looking for an exact fit.
	//
	skc_subbuf_id_t * const ids = suballocator->ids;

	while (true)
	{
	skc_uint avail_rem = suballocator->rem.avail;
	skc_uint spare_rem = suballocator->rem.spare;

	for (skc_uint avail_idx=0; avail_idx<avail_rem; avail_idx++)
	{
	skc_subbuf_id_t const avail_id = ids[avail_idx];
	struct skc_subbuf * const avail = suballocator->subbufs + avail_id;

	assert(avail->inuse == 0);

	if (avail->size == size_ru) // size matches exactly
	{
	suballocator->total += size_ru;

	// return this id
	*subbuf_id = avail_id;

	SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,avail_id,size_ru);

	// mark the subbuffer as in use
	avail->inuse += 1;

	assert(avail->inuse == 1);

	// update rem avail count
	suballocator->rem.avail = --avail_rem;

	// replace now inuse id with last avail id
	if ((avail_rem > 0) && (avail_idx != avail_rem))
	{
	skc_subbuf_id_t const last_id = ids[avail_rem];
	struct skc_subbuf * const last = suballocator->subbufs + last_id;

	ids[avail_idx] = last_id; // move id
	last->idx = avail_idx; // update idx[]
	}

	assert(suballocator->rem.avail > 0);

	// return origin
	return avail->origin;
	}
	else if ((avail->size > size_ru) && (spare_rem > 0)) // requested is less than available so split it
	{
	suballocator->total += size_ru;

	skc_uint spare_idx = suballocator->count - spare_rem;
	skc_subbuf_id_t const spare_id = ids[spare_idx];
	struct skc_subbuf * const spare = suballocator->subbufs + spare_id;

	assert(spare->inuse == 0);

	// simple -- we're popping the top-of-stack of spares
	suballocator->rem.spare -= 1;

	// return id
	*subbuf_id = spare_id;

	SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,spare_id,size_ru);

	// get prev
	struct skc_subbuf * const prev = avail->prev;

	if (prev != NULL)
	prev->next = spare;

	// init spare
	spare->prev = prev;
	spare->next = avail;
	spare->size = size_ru;
	spare->origin = avail->origin;
	spare->idx = SKC_UINT_MAX; // defensive
	spare->inuse += 1;

	// update curr
	avail->prev = spare;
	avail->size -= size_ru;
	avail->origin += size_ru;

	assert(suballocator->rem.avail > 0);

	return spare->origin;
	}
	}

	// uh oh... couldn't find enough memory
	skc_scheduler_wait(scheduler);
	}
	}

	//
	// FIXME -- simplify this with a merge-with-prev() primitive
	//

	void
	skc_suballocator_subbuf_free(struct skc_suballocator * const suballocator,
	skc_subbuf_id_t subbuf_id)
	{
	// get subbuf for id
	struct skc_subbuf * const subbuf = suballocator->subbufs + subbuf_id;

	assert(subbuf->inuse == 1);

	suballocator->total -= subbuf->size;

	SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,subbuf->size);

	//
	// try to merge subbuf with left and maybe right and then dispose
	//
	struct skc_subbuf * prev;
	struct skc_subbuf * next;

	if (((prev = subbuf->prev) != NULL) && !prev->inuse)
	{
	next = subbuf->next;

	if ((next != NULL) && !next->inuse)
	{
	subbuf->inuse -= 1;

	assert(next->inuse == 0);

	// increment size
	prev->size += (subbuf->size + next->size);

	struct skc_subbuf * const nextnext = next->next;

	// update next link
	prev->next = nextnext;

	// update prev link
	if (nextnext != NULL)
	nextnext->prev = prev;

	//
	// both subbuf and next are now spare which means we need to
	// move final available subbuffer into next's old position
	// unless they're the same
	//
	skc_uint const last_idx = --suballocator->rem.avail;
	skc_uint const next_idx = next->idx;

	assert(suballocator->rem.avail > 0);

	if (last_idx != next_idx)
	{
	skc_subbuf_id_t const last_id = suballocator->ids[last_idx];
	struct skc_subbuf * const last = suballocator->subbufs + last_id;

	suballocator->ids[next_idx] = last_id;
	last->idx = next_idx;
	}

	skc_subbuf_id_t const next_id = (skc_subbuf_id_t)(next - suballocator->subbufs);

	skc_uint const spare_rem = suballocator->rem.spare + 2;
	skc_uint const spare_idx = suballocator->count - spare_rem;

	suballocator->rem.spare = spare_rem;
	suballocator->ids[spare_idx + 0] = subbuf_id;
	suballocator->ids[spare_idx + 1] = next_id;
	}
	else
	{
	prev->size += subbuf->size;
	prev->next = next;

	if (next != NULL)
	next->prev = prev;

	subbuf->inuse -= 1;

	assert(subbuf->inuse == 0);
	assert(suballocator->rem.avail > 0);

	suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id;
	}
	}
	//
	// try to merge with right
	//
	else if (((next = subbuf->next) != NULL) && !next->inuse)
	{
	subbuf->inuse -= 1;

	assert(subbuf->inuse == 0);
	assert(suballocator->rem.avail > 0);

	next->prev = prev;
	next->origin = subbuf->origin;
	next->size += subbuf->size;

	if (prev != NULL)
	prev->next = next;

	// subbuf is now spare
	suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id;
	}
	else // couldn't merge with a neighbor
	{
	skc_uint avail_idx = suballocator->rem.avail++;

	// subbuf is now available
	subbuf->idx = avail_idx;
	subbuf->inuse -= 1;

	assert(subbuf->inuse == 0);
	assert(suballocator->rem.avail > 0);

	suballocator->ids[avail_idx] = subbuf_id;
	}
	}

	//
	//
	//

	#if 0

	//
	// At some point there might be a reason to sort the available
	// subbuffers into some useful order -- presumably to binary search
	// for the closest match or to chip away at the largest available
	// subbuffer
	//

	static
	void
	skc_suballocator_optimize(struct skc_suballocator * const suballocator)
	{
	;
	}

	#endif

	//
	//
	//