little steps forward
- impl splat, store32, which lets us handle the conceptually simplest
program, a 32-bit memset.
- dump bitcode to /tmp on success
Kind of starting to look good!
$ ninja -C out dm && out/dm -m SkVM_Pointless
$ opt --O1 /tmp/skvm-jit-211960346.bc | llvm-dis
; Function Attrs: nofree norecurse nounwind writeonly
define void @skvm-jit-211960346(i64, i8* nocapture) local_unnamed_addr #0 {
enter:
%2 = icmp ugt i64 %0, 7
br i1 %2, label %loopK, label %test1.preheader
test1.preheader: ; preds = %loopK, %enter
%.07.lcssa = phi i64 [ %0, %enter ], [ %5, %loopK ]
%.0.lcssa = phi i8* [ %1, %enter ], [ %6, %loopK ]
%3 = icmp eq i64 %.07.lcssa, 0
br i1 %3, label %leave, label %loop1
loopK: ; preds = %enter, %loopK
%.012 = phi i8* [ %6, %loopK ], [ %1, %enter ]
%.0711 = phi i64 [ %5, %loopK ], [ %0, %enter ]
%4 = bitcast i8* %.012 to <8 x i32>*
store <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>, <8 x i32>* %4, align 1
%5 = add i64 %.0711, -8
%6 = getelementptr i8, i8* %.012, i64 32
%7 = icmp ugt i64 %5, 7
br i1 %7, label %loopK, label %test1.preheader
loop1: ; preds = %test1.preheader, %loop1
%.110 = phi i8* [ %10, %loop1 ], [ %.0.lcssa, %test1.preheader ]
%.189 = phi i64 [ %9, %loop1 ], [ %.07.lcssa, %test1.preheader ]
%8 = bitcast i8* %.110 to i32*
store i32 42, i32* %8, align 1
%9 = add i64 %.189, -1
%10 = getelementptr i8, i8* %.110, i64 4
%11 = icmp eq i64 %9, 0
br i1 %11, label %leave, label %loop1
leave: ; preds = %loop1, %test1.preheader
ret void
}
attributes #0 = { nofree norecurse nounwind writeonly }
Change-Id: I00953c1113739a9ee094cb6cb3c99f1b7f8de9bf
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/273509
Reviewed-by: Herb Derby <herb@google.com>
diff --git a/src/core/SkVM.cpp b/src/core/SkVM.cpp
index 8d24258..a8b3829 100644
--- a/src/core/SkVM.cpp
+++ b/src/core/SkVM.cpp
@@ -17,6 +17,7 @@
#include "src/core/SkVM.h"
#if defined(SKVM_LLVM)
+ #include <llvm/Bitcode/BitcodeWriter.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Verifier.h>
#endif
@@ -1895,13 +1896,19 @@
// Smallest program:
// b.store32(b.varying<int>(), b.splat(42));
static bool try_llvm(const std::vector<OptimizedInstruction>& instructions,
- const std::vector<int>& strides) {
+ const std::vector<int>& strides,
+ const char* debug_name) {
llvm::LLVMContext ctx;
llvm::Module mod("", ctx);
// All the scary bare pointers from here on are owned by ctx or mod, I think.
- llvm::IntegerType* i64 = llvm::Type::getInt64Ty(ctx);
- llvm::Type* ptr = llvm::Type::getInt8Ty(ctx)->getPointerTo();
+ const int K = 8; // Primary vector width.
+ llvm::Type *ptr = llvm::Type::getInt8Ty(ctx)->getPointerTo();
+ //llvm::Type *f32 = llvm::Type::getFloatTy(ctx);
+ llvm::IntegerType *i32 = llvm::Type::getInt32Ty(ctx),
+ *i64 = llvm::Type::getInt64Ty(ctx);
+ //llvm::VectorType *I32 = llvm::VectorType::get(i32, K),
+ // *F32 = llvm::VectorType::get(f32, K);
std::vector<llvm::Type*> arg_types = { i64 };
for (size_t i = 0; i < strides.size(); i++) {
@@ -1911,7 +1918,7 @@
llvm::FunctionType* fn_type = llvm::FunctionType::get(llvm::Type::getVoidTy(ctx),
arg_types, /*vararg?=*/false);
llvm::Function* fn
- = llvm::Function::Create(fn_type, llvm::GlobalValue::ExternalLinkage, "", mod);
+ = llvm::Function::Create(fn_type, llvm::GlobalValue::ExternalLinkage, debug_name, mod);
llvm::BasicBlock *enter = llvm::BasicBlock::Create(ctx, "enter", fn),
*testK = llvm::BasicBlock::Create(ctx, "testK", fn),
@@ -1922,17 +1929,39 @@
using IRBuilder = llvm::IRBuilder<>;
+ llvm::Value* n;
+ std::vector<llvm::Value*> args;
+ std::vector<llvm::Value*> vals(instructions.size());
+
auto emit = [&](size_t i, bool scalar, IRBuilder* b) {
- const OptimizedInstruction& insn = instructions[i];
- switch (insn.op) {
- default: return false;
+ auto [op, x,y,z, immy,immz, death,can_hoist,used_in_loop] = instructions[i];
+ switch (op) {
+ default:
+ SkDebugf("can't llvm %s (%d)\n", name(op), op);
+ return false;
+
+ case Op::store32: {
+ llvm::Value* v = vals[x];
+ if (scalar) {
+ v = b->CreateExtractElement(v, (uint64_t)0);
+ }
+ llvm::Value* ptr = b->CreateBitCast(b->CreateLoad(args[immy]),
+ v->getType()->getPointerTo());
+ vals[i] = b->CreateAlignedStore(v, ptr, 1);
+ } break;
+
+ // Ops below this line shouldn't need to consider `scalar`... they're Just Math.
+
+ case Op::splat:
+ vals[i] = llvm::ConstantVector::getSplat(K, llvm::ConstantInt::get(i32, immy));
+ break;
+
}
return true;
};
- // enter: set up stack homes for N and each pointer arg
- llvm::Value* n;
- std::vector<llvm::Value*> args;
+ // enter: set up stack homes `n` and `args` for loop counter and uniform/varying pointers.
+ // TODO: manual PHI nodes for these instead of relying on load/store and mem2reg
{
IRBuilder b(enter);
@@ -1949,14 +1978,13 @@
}
// testK: if (N >= K) goto loopK; else goto test1;
- const int K = 8;
llvm::ConstantInt* i64_K = llvm::ConstantInt::get(i64, K);
{
IRBuilder b(testK);
b.CreateCondBr(b.CreateICmpUGE(b.CreateLoad(n), i64_K), loopK, test1);
}
- // loopK: ... insns on K x T vectors; N -= K, args += K*stride; goto testK;
+ // loopK: ... insts on K x T vectors; N -= K, args += K*stride; goto testK;
{
IRBuilder b(loopK);
for (size_t i = 0; i < instructions.size(); i++) {
@@ -1979,7 +2007,7 @@
b.CreateCondBr(b.CreateICmpUGE(b.CreateLoad(n), i64_1), loop1, leave);
}
- // loop1: ... insns on scalars; N -= 1, args += stride; goto test1;
+ // loop1: ... insts on scalars; N -= 1, args += stride; goto test1;
{
IRBuilder b(loop1);
for (size_t i = 0; i < instructions.size(); i++) {
@@ -2002,6 +2030,14 @@
}
SkASSERT(false == llvm::verifyModule(mod));
+
+ SkString path = SkStringPrintf("/tmp/%s.bc", debug_name);
+ std::error_code err;
+ llvm::raw_fd_ostream os(path.c_str(), err);
+ if (err) {
+ return false;
+ }
+ llvm::WriteBitcodeToFile(mod, os);
return true;
}
#endif
@@ -2056,13 +2092,6 @@
Program::Program(const std::vector<OptimizedInstruction>& interpreter,
const std::vector<int>& strides) : fStrides(strides) {
this->setupInterpreter(interpreter);
- #if defined(SKVM_LLVM)
- if (try_llvm(interpreter, fStrides)) {
- SkDebugf("hey, neat! that might work\n");
- } else {
- SkDebugf("bummer\n");
- }
- #endif
}
Program::Program(const std::vector<OptimizedInstruction>& interpreter,
@@ -2072,6 +2101,14 @@
#if 1 && defined(SKVM_JIT)
this->setupJIT(jit, debug_name);
#endif
+
+ #if defined(SKVM_LLVM)
+ if (try_llvm(interpreter, fStrides, debug_name)) {
+ SkDebugf("hey, neat! that might work\n");
+ } else {
+ SkDebugf("bummer\n");
+ }
+ #endif
}
// Translate OptimizedInstructions to Program::Instructions used by the interpreter.