Add some tracking stats to ddlbench
Change-Id: I62d8cdec7dfe126513e616c9966e2a8eb994d95f
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/373880
Reviewed-by: Adlai Holler <adlai@google.com>
Commit-Queue: Robert Phillips <robertphillips@google.com>
diff --git a/experimental/ddlbench/ddlbench.cpp b/experimental/ddlbench/ddlbench.cpp
index df49fa0..4f701bf 100644
--- a/experimental/ddlbench/ddlbench.cpp
+++ b/experimental/ddlbench/ddlbench.cpp
@@ -14,9 +14,13 @@
#include "src/sksl/SkSLDefines.h"
+#include <chrono>
#include <deque>
#include <thread>
+using hires_clock = std::chrono::high_resolution_clock;
+using duration = std::chrono::nanoseconds;
+
using sk_gpu_test::ContextInfo;
using sk_gpu_test::GrContextFactory;
using sk_gpu_test::TestContext;
@@ -37,29 +41,49 @@
struct ThreadInfo {
ThreadInfo() = default;
- ThreadInfo(const char* name, GrDirectContext* directContext, TestContext* testContext)
- : fDirectContext(directContext)
- , fTestContext(testContext) {
- memcpy(fName, name, 8);
- fName[7] = '\0';
+ ThreadInfo(const SkString& name, GrDirectContext* directContext, TestContext* testContext)
+ : fName(name)
+ , fDirectContext(directContext)
+ , fTestContext(testContext) {
}
- char fName[8] = { '\0' };
+ double elapsedWorkSeconds() const {
+ return std::chrono::duration<double>(fWorkElapsed).count();
+ }
+
+ void dump() const {
+ duration totalThreadTime = fThreadStop - fThreadStart;
+ double totalThreadTimeSeconds = std::chrono::duration<double>(totalThreadTime).count();
+
+ printf("%s: num work units %d work: %.2gs total: %.2gs utilization %.2g%%\n",
+ fName.c_str(),
+ fWorkUnit,
+ this->elapsedWorkSeconds(),
+ totalThreadTimeSeconds,
+ 100.0f * this->elapsedWorkSeconds() / totalThreadTimeSeconds);
+ }
+
+ SkString fName;
// These two can be null on recording/utility threads
- GrDirectContext* fDirectContext = nullptr;
- TestContext* fTestContext = nullptr;
+ GrDirectContext* fDirectContext = nullptr;
+ TestContext* fTestContext = nullptr;
+
+ int fWorkUnit = 0;
+ duration fWorkElapsed {0};
+ hires_clock::time_point fThreadStart;
+ hires_clock::time_point fThreadStop;
};
#if SKSL_USE_THREAD_LOCAL
-static thread_local ThreadInfo gThreadInfo;
+static thread_local ThreadInfo* gThreadInfo;
static ThreadInfo* get_thread_local_info() {
- return &gThreadInfo;
+ return gThreadInfo;
}
-static void set_thread_local_info(const ThreadInfo& threadInfo) {
+static void set_thread_local_info(ThreadInfo* threadInfo) {
gThreadInfo = threadInfo;
}
@@ -83,16 +107,19 @@
return static_cast<ThreadInfo*>(pthread_getspecific(get_pthread_key()));
}
-static void set_thread_local_info(const ThreadInfo& threadInfo) {
- pthread_setspecific(get_pthread_key(), nullptr);
+static void set_thread_local_info(ThreadInfo* threadInfo) {
+ pthread_setspecific(get_pthread_key(), threadInfo);
}
#endif
-static void set_up_context_on_thread(const ThreadInfo& threadInfo) {
- if (threadInfo.fDirectContext) {
- threadInfo.fTestContext->makeCurrent();
+static void set_up_context_on_thread(ThreadInfo* threadInfo) {
+ if (threadInfo->fDirectContext) {
+ threadInfo->fTestContext->makeCurrent();
}
+
+ threadInfo->fThreadStart = hires_clock::now();
+
set_thread_local_info(threadInfo);
}
@@ -100,19 +127,16 @@
// at the start of each thread and some thread_local data to hold the utility context/
class GrThreadPool {
public:
- static std::unique_ptr<GrThreadPool> MakeFIFOThreadPool(int threads,
- std::vector<ThreadInfo>& contexts,
+ static std::unique_ptr<GrThreadPool> MakeFIFOThreadPool(SkSpan<ThreadInfo> threadInfo,
bool allowBorrowing = true) {
- SkASSERT(threads > 0);
- return std::make_unique<GrThreadPool>(threads, contexts, allowBorrowing);
+ return std::make_unique<GrThreadPool>(threadInfo, allowBorrowing);
}
- explicit GrThreadPool(int threads, std::vector<ThreadInfo>& contexts, bool allowBorrowing)
+ explicit GrThreadPool(SkSpan<ThreadInfo> threadInfo, bool allowBorrowing)
: fAllowBorrowing(allowBorrowing) {
- SkASSERT(((int)contexts.size()) >= threads);
- for (int i = 0; i < threads; i++) {
- fThreads.emplace_back(&Loop, this, contexts[i]);
+ for (size_t i = 0; i < threadInfo.size(); i++) {
+ fThreads.emplace_back(&Loop, this, &threadInfo[i]);
}
}
@@ -140,13 +164,13 @@
void borrow() {
// If there is work waiting and we're allowed to borrow work, do it.
if (fAllowBorrowing && fWorkAvailable.try_wait()) {
- SkAssertResult(this->do_work());
+ SkAssertResult(this->do_work(nullptr));
}
}
private:
// This method should be called only when fWorkAvailable indicates there's work to do.
- bool do_work() {
+ bool do_work(ThreadInfo* threadInfo) {
std::function<void(void)> work;
{
SkAutoMutexExclusive lock(fWorkLock);
@@ -156,20 +180,25 @@
}
if (!work) {
+ threadInfo->fThreadStop = hires_clock::now();
return false; // This is Loop()'s signal to shut down.
}
+ hires_clock::time_point start = hires_clock::now();
work();
+ threadInfo->fWorkElapsed = hires_clock::now() - start;
+ threadInfo->fWorkUnit++;
+
return true;
}
- static void Loop(void* ctx, const ThreadInfo& threadInfo) {
+ static void Loop(void* ctx, ThreadInfo* threadInfo) {
set_up_context_on_thread(threadInfo);
auto pool = (GrThreadPool*)ctx;
do {
pool->fWorkAvailable.wait();
- } while (pool->do_work());
+ } while (pool->do_work(threadInfo));
}
using WorkList = std::deque<std::function<void(void)>>;
@@ -220,26 +249,25 @@
static bool create_contexts(GrContextFactory* factory,
GrContextFactory::ContextType contextType,
const GrContextFactory::ContextOverrides& overrides,
- std::vector<ThreadInfo>* mainContext,
- int numUtilityContexts,
- std::vector<ThreadInfo>* utilityContexts) {
+ ThreadInfo* gpuThread,
+ SkSpan<ThreadInfo> utilityThreads) {
ContextInfo mainInfo = factory->getContextInfo(contextType, overrides);
if (!mainInfo.directContext()) {
exitf("Could not create primary direct context.");
}
- mainContext->push_back({ "g0", mainInfo.directContext(), mainInfo.testContext() });
+ *gpuThread = { SkString("g0"), mainInfo.directContext(), mainInfo.testContext() };
bool allSucceeded = true, allFailed = true;
// Create the utility contexts in a share group with the primary one. This is allowed to fail
// but either they should all work or the should all fail.
- for (int i = 0; i < numUtilityContexts; ++i) {
- SkString name = SkStringPrintf("r%d", i);
+ for (size_t i = 0; i < utilityThreads.size(); ++i) {
+ SkString name = SkStringPrintf("r%zu", i);
ContextInfo tmp = factory->getSharedContextInfo(mainInfo.directContext(), i);
- utilityContexts->push_back({ name.c_str(), tmp.directContext(), tmp.testContext() });
+ utilityThreads[i] = { name, tmp.directContext(), tmp.testContext() };
allSucceeded &= SkToBool(tmp.directContext());
allFailed &= !tmp.directContext();
}
@@ -288,70 +316,74 @@
GrContextFactory factory(kContextOptions);
- std::vector<ThreadInfo> mainContext;
- mainContext.reserve(1);
- std::vector<ThreadInfo> utilityContexts;
- utilityContexts.reserve(FLAGS_ddlNumRecordingThreads);
+ std::unique_ptr<ThreadInfo> mainContext(new ThreadInfo);
+ std::unique_ptr<ThreadInfo[]> utilityContexts(new ThreadInfo[FLAGS_ddlNumRecordingThreads]);
if (!create_contexts(&factory,
kContextType,
kOverrides,
- &mainContext,
- FLAGS_ddlNumRecordingThreads,
- &utilityContexts)) {
+ mainContext.get(),
+ SkSpan<ThreadInfo>(utilityContexts.get(), FLAGS_ddlNumRecordingThreads))) {
return 1;
}
- mainContext.front().fTestContext->makeCurrent();
+ mainContext->fTestContext->makeCurrent();
- SkYUVAPixmapInfo::SupportedDataTypes supportedYUVADTypes(*mainContext.front().fDirectContext);
+ SkYUVAPixmapInfo::SupportedDataTypes supportedYUVADTypes(*mainContext->fDirectContext);
DDLPromiseImageHelper promiseImageHelper(supportedYUVADTypes);
sk_sp<SkPicture> skp = create_shared_skp(FLAGS_src[0],
- mainContext.front().fDirectContext,
+ mainContext->fDirectContext,
&promiseImageHelper);
- promiseImageHelper.createCallbackContexts(mainContext.front().fDirectContext);
+ promiseImageHelper.createCallbackContexts(mainContext->fDirectContext);
// TODO: do this later on a utility thread!
- promiseImageHelper.uploadAllToGPU(nullptr, mainContext.front().fDirectContext);
+ promiseImageHelper.uploadAllToGPU(nullptr, mainContext->fDirectContext);
- mainContext.front().fTestContext->makeNotCurrent();
+ mainContext->fTestContext->makeNotCurrent();
- std::unique_ptr<GrThreadPool> fGPUExecutor(GrThreadPool::MakeFIFOThreadPool(1,
- mainContext,
- false));
- std::unique_ptr<GrThreadPool> fRecordingExecutor(GrThreadPool::MakeFIFOThreadPool(
- FLAGS_ddlNumRecordingThreads,
- utilityContexts,
- false));
- GrTaskGroup gpuTaskGroup(*fGPUExecutor);
- GrTaskGroup recordingTaskGroup(*fRecordingExecutor);
+ {
+ std::unique_ptr<GrThreadPool> fGPUExecutor(GrThreadPool::MakeFIFOThreadPool(
+ SkSpan<ThreadInfo>(mainContext.get(), 1),
+ false));
+ std::unique_ptr<GrThreadPool> fRecordingExecutor(GrThreadPool::MakeFIFOThreadPool(
+ SkSpan<ThreadInfo>(utilityContexts.get(),
+ FLAGS_ddlNumRecordingThreads),
+ false));
+ GrTaskGroup gpuTaskGroup(*fGPUExecutor);
+ GrTaskGroup recordingTaskGroup(*fRecordingExecutor);
- for (int i = 0; i < FLAGS_ddlNumRecordingThreads; ++i) {
- recordingTaskGroup.add([] {
- ThreadInfo* threadLocal = get_thread_local_info();
- printf("%s: dContext %p\n", threadLocal->fName,
- threadLocal->fDirectContext);
- std::this_thread::sleep_for(std::chrono::seconds(1));
- });
+ for (int i = 0; i < FLAGS_ddlNumRecordingThreads; ++i) {
+ recordingTaskGroup.add([] {
+ ThreadInfo* threadLocal = get_thread_local_info();
+ printf("%s: dContext %p\n", threadLocal->fName.c_str(),
+ threadLocal->fDirectContext);
+ std::this_thread::sleep_for(std::chrono::seconds(1));
+ });
+ }
+
+ gpuTaskGroup.add([] {
+ ThreadInfo* threadLocal = get_thread_local_info();
+ printf("%s: dContext %p\n", threadLocal->fName.c_str(),
+ threadLocal->fDirectContext);
+ });
+
+ gpuTaskGroup.add([] {
+ ThreadInfo* threadLocal = get_thread_local_info();
+ threadLocal->fTestContext->makeNotCurrent();
+ });
+
+ recordingTaskGroup.wait();
+ gpuTaskGroup.wait();
}
- gpuTaskGroup.add([] {
- ThreadInfo* threadLocal = get_thread_local_info();
- printf("%s: dContext %p\n", threadLocal->fName,
- threadLocal->fDirectContext);
- });
+ mainContext->fTestContext->makeCurrent();
- gpuTaskGroup.add([testCtx = mainContext.front().fTestContext] {
- ThreadInfo* threadLocal = get_thread_local_info();
- threadLocal->fTestContext->makeNotCurrent();
- });
-
- recordingTaskGroup.wait();
- gpuTaskGroup.wait();
-
- mainContext.front().fTestContext->makeCurrent();
+ mainContext->dump();
+ for (int i = 0; i < FLAGS_ddlNumRecordingThreads; ++i) {
+ utilityContexts[i].dump();
+ }
return 0;
}