Add tests for tail handling in SkJumper.

Change-Id: Ib4ecc33dc9552c16b5530359cd3649487e70bbed
Reviewed-on: https://skia-review.googlesource.com/18067
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Herb Derby <herb@google.com>
diff --git a/tests/SkRasterPipelineTest.cpp b/tests/SkRasterPipelineTest.cpp
index da37aa2..63c5c89 100644
--- a/tests/SkRasterPipelineTest.cpp
+++ b/tests/SkRasterPipelineTest.cpp
@@ -79,3 +79,126 @@
         }
     }
 }
+
+static uint16_t h(float f) {
+    // Remember, a float is 1-8-23 (sign-exponent-mantissa) with 127 exponent bias.
+    uint32_t sem;
+    memcpy(&sem, &f, sizeof(sem));
+    uint32_t s  = sem & 0x80000000,
+             em = sem ^ s;
+
+    // Convert to 1-5-10 half with 15 bias, flushing denorm halfs (including zero) to zero.
+    auto denorm = (int32_t)em < 0x38800000;  // I32 comparison is often quicker, and always safe
+    // here.
+    return denorm ? SkTo<uint16_t>(0)
+                  : SkTo<uint16_t>((s>>16) + (em>>13) - ((127-15)<<10));
+}
+
+static uint16_t n(uint16_t x) {
+    return (x<<8) | (x>>8);
+}
+
+static float a(uint16_t x) {
+    return (1/65535.0f) * x;
+}
+
+DEF_TEST(SkRasterPipeline_tail, r) {
+    {
+        float data[][4] = {
+            {00, 01, 02, 03},
+            {10, 11, 12, 13},
+            {20, 21, 22, 23},
+            {30, 31, 32, 33},
+        };
+
+        float buffer[4][4];
+        float* src = &data[0][0];
+        float* dst = &buffer[0][0];
+
+        for (unsigned i = 0; i < 4; i++) {
+            memset(buffer, 0xff, sizeof(buffer));
+            SkRasterPipeline_<256> p;
+            p.append(SkRasterPipeline::load_f32, &src);
+            p.append(SkRasterPipeline::store_f32, &dst);
+            p.run(0, i % 4);
+            for (unsigned j = 0; j < i; j++) {
+                REPORTER_ASSERT(r,
+                                !memcmp(&data[j][0], &buffer[j][0], sizeof(buffer[j])));
+            }
+            for (int j = i; j < 4; j++) {
+                for (auto f : buffer[j]) {
+                    REPORTER_ASSERT(r, SkScalarIsNaN(f));
+                }
+            }
+        }
+    }
+
+    {
+        uint16_t data[][4] = {
+            {h(00), h(01), h(02), h(03)},
+            {h(10), h(11), h(12), h(13)},
+            {h(20), h(21), h(22), h(23)},
+            {h(30), h(31), h(32), h(33)},
+        };
+        uint16_t buffer[4][4];
+        uint16_t* src = &data[0][0];
+        uint16_t* dst = &buffer[0][0];
+
+        for (unsigned i = 0; i < 4; i++) {
+            memset(buffer, 0xff, sizeof(buffer));
+            SkRasterPipeline_<256> p;
+            p.append(SkRasterPipeline::load_f16, &src);
+            p.append(SkRasterPipeline::store_f16, &dst);
+            p.run(0, i % 4);
+            for (unsigned j = 0; j < i; j++) {
+                REPORTER_ASSERT(r,
+                                !memcmp(&data[j][0], &buffer[j][0], sizeof(buffer[j])));
+            }
+            for (int j = i; j < 4; j++) {
+                for (auto f : buffer[j]) {
+                    REPORTER_ASSERT(r, f == 0xffff);
+                }
+            }
+        }
+    }
+
+    {
+        uint16_t data[][3] = {
+            {n(00), n(01), n(02)},
+            {n(10), n(11), n(12)},
+            {n(20), n(21), n(22)},
+            {n(30), n(31), n(32)}
+        };
+
+        float answer[][4] = {
+            {a(00), a(01), a(02), 1.0f},
+            {a(10), a(11), a(12), 1.0f},
+            {a(20), a(21), a(22), 1.0f},
+            {a(30), a(31), a(32), 1.0f}
+        };
+
+        float buffer[4][4];
+        uint16_t* src = &data[0][0];
+        float* dst = &buffer[0][0];
+
+        for (unsigned i = 0; i < 4; i++) {
+            memset(buffer, 0xff, sizeof(buffer));
+            SkRasterPipeline_<256> p;
+            p.append(SkRasterPipeline::load_rgb_u16_be, &src);
+            p.append(SkRasterPipeline::store_f32, &dst);
+            p.run(0, i % 4);
+            for (unsigned j = 0; j < i; j++) {
+                for (unsigned k = 0; k < 4; k++) {
+                    if (buffer[j][k] != answer[j][k]) {
+                        ERRORF(r, "(%u, %u) - a: %g r: %g\n", j, k, answer[j][k], buffer[j][k]);
+                    }
+                }
+            }
+            for (int j = i; j < 4; j++) {
+                for (auto f : buffer[j]) {
+                    REPORTER_ASSERT(r, SkScalarIsNaN(f));
+                }
+            }
+        }
+    }
+}