fix bug for add/sub with r8-r15

I had been setting the REX R bit to select high registers,
but you actually set the B bit.  Don't know how I got that
wrong before... the leading byte should be 49 not 4c.

    $ cat test.s
    foo:
        addq $7, %r8

    $ clang -c test.s && objdump -d test.o

    0000000000000000 <foo>:
       0:	49 83 c0 07          	add    $0x7,%r8

Change-Id: I039e1c4f4ea20523a1e2cc9bcf5f6d9321a6223b
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/227177
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/src/core/SkVM.cpp b/src/core/SkVM.cpp
index d3f5bce..557bbe8 100644
--- a/src/core/SkVM.cpp
+++ b/src/core/SkVM.cpp
@@ -420,7 +420,7 @@
             opcode |= 0b0000'0010;  // second bit set for 8-bit immediate, else 32-bit.
         }
 
-        this->byte(rex(1,dst>>3,0,0));
+        this->byte(rex(1,0,0,dst>>3));
         this->byte(opcode);
         this->byte(mod_rm(Mod::Direct, opcode_ext, dst&7));
         this->byte(&imm, imm_bytes);
diff --git a/tests/SkVMTest.cpp b/tests/SkVMTest.cpp
index ecb65dd..4bf52e9 100644
--- a/tests/SkVMTest.cpp
+++ b/tests/SkVMTest.cpp
@@ -415,7 +415,7 @@
         a.add(A::rdi, 12);      // Last 0x48 REX
         a.sub(A::rdi, 8);
 
-        a.add(A::r8 , 7);       // First 0x4c REX
+        a.add(A::r8 , 7);       // First 0x49 REX
         a.sub(A::r8 , 4);
 
         a.add(A::rsi, 128);     // Requires 4 byte immediate.
@@ -427,11 +427,11 @@
         0x48, 0x83, 0b11'000'111, 0x0c,
         0x48, 0x83, 0b11'101'111, 0x08,
 
-        0x4c, 0x83, 0b11'000'000, 0x07,
-        0x4c, 0x83, 0b11'101'000, 0x04,
+        0x49, 0x83, 0b11'000'000, 0x07,
+        0x49, 0x83, 0b11'101'000, 0x04,
 
         0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
-        0x4c, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
+        0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
     });