blob: c6b9ec1681015e5588b2a10aa847bf55a1d0ad5a [file] [log] [blame]
/*
* Copyright 2018 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can
* be found in the LICENSE file.
*
*/
//
//
//
#include <assert.h>
#include <memory.h>
#include "runtime_cl_12.h"
#include "scheduler.h"
//
//
//
#ifndef NDEBUG
#include <stdio.h>
#define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss) \
fprintf(stderr, \
"suballocator %s : [ %4u ] : alloc( %9u ) @ %4u = %u\n", \
suballocator->name, \
suballocator->rem.avail, \
(skc_uint)ss, \
subbuf_id, \
(skc_uint)suballocator->total);
#define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss) \
fprintf(stderr, \
"suballocator %s : [ %4u ] : free ( %9u ) @ %4u = %u\n", \
suballocator->name, \
suballocator->rem.avail, \
(skc_uint)ss, \
subbuf_id, \
(skc_uint)suballocator->total);
#else
#define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss)
#define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss)
#endif
//
//
//
void
skc_suballocator_create(struct skc_runtime * const runtime,
struct skc_suballocator * const suballocator,
char const * const name,
skc_uint const subbufs,
size_t const align,
size_t const size)
{
size_t const subbufs_size = sizeof(*suballocator->subbufs) * subbufs;
// allocate array of subbuf records
suballocator->subbufs = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,subbufs_size);
// zero subbufs
memset(suballocator->subbufs,0,subbufs_size);
// initialize starting subbuf
suballocator->subbufs[0].size = (skc_subbuf_size_t)size;
// allocate array of ids
suballocator->ids = skc_runtime_host_perm_alloc(runtime,
SKC_MEM_FLAGS_READ_WRITE,
sizeof(*suballocator->ids) * subbufs);
for (skc_uint ii=0; ii<subbufs; ii++)
suballocator->ids[ii] = ii;
suballocator->rem.avail = 1;
suballocator->rem.spare = subbufs - 1;
suballocator->align = (skc_uint)align;
suballocator->count = subbufs;
suballocator->size = (skc_subbuf_size_t)size;
suballocator->total = 0;
suballocator->name = name;
}
void
skc_suballocator_dispose(struct skc_runtime * const runtime,
struct skc_suballocator * const suballocator)
{
skc_runtime_host_perm_free(runtime,suballocator->ids);
skc_runtime_host_perm_free(runtime,suballocator->subbufs);
}
//
// Sets id and returns origin
//
size_t
skc_suballocator_subbuf_alloc(struct skc_suballocator * const suballocator,
struct skc_scheduler * const scheduler,
size_t const size,
skc_subbuf_id_t * const subbuf_id,
size_t * const subbuf_size)
{
//
// Note that we can't deadlock here because everything allocated is
// expected to be freed within msecs. Worst case, we wait for a
// availability of resources while a fully utilized GPU is making
// forward progress on kernels.
//
// This behavior should guide the sizing of the suballocator's
// number of subbuffers and extent.
//
// We want to allocate a large enough extent and enough subbuffer
// records so that the CPU/GPU is never starved.
//
// round up the size
skc_subbuf_size_t const size_ru = (skc_subbuf_size_t)SKC_ROUND_UP_POW2(size,suballocator->align);
// save it
if (subbuf_size != NULL)
*subbuf_size = size_ru;
//
// We precheck to see there is at least one region of memory
// available but do not check to see if there is a spare. Instead,
// we simply keep looking for an exact fit.
//
skc_subbuf_id_t * const ids = suballocator->ids;
while (true)
{
skc_uint avail_rem = suballocator->rem.avail;
skc_uint spare_rem = suballocator->rem.spare;
for (skc_uint avail_idx=0; avail_idx<avail_rem; avail_idx++)
{
skc_subbuf_id_t const avail_id = ids[avail_idx];
struct skc_subbuf * const avail = suballocator->subbufs + avail_id;
assert(avail->inuse == 0);
if (avail->size == size_ru) // size matches exactly
{
suballocator->total += size_ru;
// return this id
*subbuf_id = avail_id;
SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,avail_id,size_ru);
// mark the subbuffer as in use
avail->inuse += 1;
assert(avail->inuse == 1);
// update rem avail count
suballocator->rem.avail = --avail_rem;
// replace now inuse id with last avail id
if ((avail_rem > 0) && (avail_idx != avail_rem))
{
skc_subbuf_id_t const last_id = ids[avail_rem];
struct skc_subbuf * const last = suballocator->subbufs + last_id;
ids[avail_idx] = last_id; // move id
last->idx = avail_idx; // update idx[]
}
assert(suballocator->rem.avail > 0);
// return origin
return avail->origin;
}
else if ((avail->size > size_ru) && (spare_rem > 0)) // requested is less than available so split it
{
suballocator->total += size_ru;
skc_uint spare_idx = suballocator->count - spare_rem;
skc_subbuf_id_t const spare_id = ids[spare_idx];
struct skc_subbuf * const spare = suballocator->subbufs + spare_id;
assert(spare->inuse == 0);
// simple -- we're popping the top-of-stack of spares
suballocator->rem.spare -= 1;
// return id
*subbuf_id = spare_id;
SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,spare_id,size_ru);
// get prev
struct skc_subbuf * const prev = avail->prev;
if (prev != NULL)
prev->next = spare;
// init spare
spare->prev = prev;
spare->next = avail;
spare->size = size_ru;
spare->origin = avail->origin;
spare->idx = SKC_UINT_MAX; // defensive
spare->inuse += 1;
// update curr
avail->prev = spare;
avail->size -= size_ru;
avail->origin += size_ru;
assert(suballocator->rem.avail > 0);
return spare->origin;
}
}
// uh oh... couldn't find enough memory
skc_scheduler_wait(scheduler);
}
}
//
// FIXME -- simplify this with a merge-with-prev() primitive
//
void
skc_suballocator_subbuf_free(struct skc_suballocator * const suballocator,
skc_subbuf_id_t subbuf_id)
{
// get subbuf for id
struct skc_subbuf * const subbuf = suballocator->subbufs + subbuf_id;
assert(subbuf->inuse == 1);
suballocator->total -= subbuf->size;
SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,subbuf->size);
//
// try to merge subbuf with left and maybe right and then dispose
//
struct skc_subbuf * prev;
struct skc_subbuf * next;
if (((prev = subbuf->prev) != NULL) && !prev->inuse)
{
next = subbuf->next;
if ((next != NULL) && !next->inuse)
{
subbuf->inuse -= 1;
assert(next->inuse == 0);
// increment size
prev->size += (subbuf->size + next->size);
struct skc_subbuf * const nextnext = next->next;
// update next link
prev->next = nextnext;
// update prev link
if (nextnext != NULL)
nextnext->prev = prev;
//
// both subbuf and next are now spare which means we need to
// move final available subbuffer into next's old position
// unless they're the same
//
skc_uint const last_idx = --suballocator->rem.avail;
skc_uint const next_idx = next->idx;
assert(suballocator->rem.avail > 0);
if (last_idx != next_idx)
{
skc_subbuf_id_t const last_id = suballocator->ids[last_idx];
struct skc_subbuf * const last = suballocator->subbufs + last_id;
suballocator->ids[next_idx] = last_id;
last->idx = next_idx;
}
skc_subbuf_id_t const next_id = (skc_subbuf_id_t)(next - suballocator->subbufs);
skc_uint const spare_rem = suballocator->rem.spare + 2;
skc_uint const spare_idx = suballocator->count - spare_rem;
suballocator->rem.spare = spare_rem;
suballocator->ids[spare_idx + 0] = subbuf_id;
suballocator->ids[spare_idx + 1] = next_id;
}
else
{
prev->size += subbuf->size;
prev->next = next;
if (next != NULL)
next->prev = prev;
subbuf->inuse -= 1;
assert(subbuf->inuse == 0);
assert(suballocator->rem.avail > 0);
suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id;
}
}
//
// try to merge with right
//
else if (((next = subbuf->next) != NULL) && !next->inuse)
{
subbuf->inuse -= 1;
assert(subbuf->inuse == 0);
assert(suballocator->rem.avail > 0);
next->prev = prev;
next->origin = subbuf->origin;
next->size += subbuf->size;
if (prev != NULL)
prev->next = next;
// subbuf is now spare
suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id;
}
else // couldn't merge with a neighbor
{
skc_uint avail_idx = suballocator->rem.avail++;
// subbuf is now available
subbuf->idx = avail_idx;
subbuf->inuse -= 1;
assert(subbuf->inuse == 0);
assert(suballocator->rem.avail > 0);
suballocator->ids[avail_idx] = subbuf_id;
}
}
//
//
//
#if 0
//
// At some point there might be a reason to sort the available
// subbuffers into some useful order -- presumably to binary search
// for the closest match or to chip away at the largest available
// subbuffer
//
static
void
skc_suballocator_optimize(struct skc_suballocator * const suballocator)
{
;
}
#endif
//
//
//