Have std/tga decode BGRX5551 images

Updates #67
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 5f6ae9a..a2004bb 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -9819,6 +9819,7 @@
   struct {
     uint8_t f_dst_palette[1024];
     uint8_t f_src_palette[1024];
+    uint8_t f_scratch[4];
 
     struct {
       uint32_t v_i;
@@ -9828,7 +9829,6 @@
       uint64_t v_dst_bytes_per_pixel;
       uint32_t v_dst_x;
       uint32_t v_dst_y;
-      uint8_t v_src[4];
       uint64_t v_mark;
       uint32_t v_num_pixels32;
       uint32_t v_lit_length;
@@ -42777,13 +42777,14 @@
   wuffs_base__slice_u8 v_dst = {0};
   uint64_t v_dst_start = 0;
   wuffs_base__slice_u8 v_src_palette = {0};
-  uint8_t v_src[4] = {0};
   uint64_t v_mark = 0;
   uint64_t v_num_pixels64 = 0;
   uint32_t v_num_pixels32 = 0;
   uint32_t v_lit_length = 0;
   uint64_t v_num_dst_bytes = 0;
   uint32_t v_num_src_bytes = 0;
+  uint32_t v_c = 0;
+  uint32_t v_c5 = 0;
 
   const uint8_t* iop_a_src = NULL;
   const uint8_t* io0_a_src WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
@@ -42801,7 +42802,6 @@
     v_dst_bytes_per_pixel = self->private_data.s_decode_frame[0].v_dst_bytes_per_pixel;
     v_dst_x = self->private_data.s_decode_frame[0].v_dst_x;
     v_dst_y = self->private_data.s_decode_frame[0].v_dst_y;
-    memcpy(v_src, self->private_data.s_decode_frame[0].v_src, sizeof(v_src));
     v_mark = self->private_data.s_decode_frame[0].v_mark;
     v_num_pixels32 = self->private_data.s_decode_frame[0].v_num_pixels32;
     v_lit_length = self->private_data.s_decode_frame[0].v_lit_length;
@@ -42902,11 +42902,21 @@
             }
           } else {
             if (v_lit_length > 0) {
-              v_src[0] = 127;
-              v_src[1] = 0;
-              v_src[2] = 255;
-              v_src[3] = 255;
-              wuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(&self->private_impl.f_swizzler, v_dst, wuffs_base__pixel_buffer__palette(a_dst), wuffs_base__make_slice_u8(v_src, 4));
+              if (((uint64_t)(io2_a_src - iop_a_src)) < 2) {
+                status = wuffs_base__make_status(wuffs_base__suspension__short_read);
+                WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(4);
+                goto label__resume__continue;
+              }
+              v_c = ((uint32_t)(wuffs_base__peek_u16le__no_bounds_check(iop_a_src)));
+              iop_a_src += 2;
+              v_c5 = (31 & (v_c >> 0));
+              self->private_data.f_scratch[0] = ((uint8_t)(((v_c5 << 3) | (v_c5 >> 2))));
+              v_c5 = (31 & (v_c >> 5));
+              self->private_data.f_scratch[1] = ((uint8_t)(((v_c5 << 3) | (v_c5 >> 2))));
+              v_c5 = (31 & (v_c >> 10));
+              self->private_data.f_scratch[2] = ((uint8_t)(((v_c5 << 3) | (v_c5 >> 2))));
+              self->private_data.f_scratch[3] = 255;
+              wuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(&self->private_impl.f_swizzler, v_dst, v_dst_palette, wuffs_base__make_slice_u8(self->private_data.f_scratch, 4));
               if (v_dst_bytes_per_pixel <= ((uint64_t)(v_dst.len))) {
                 v_dst = wuffs_base__slice_u8__subslice_i(v_dst, v_dst_bytes_per_pixel);
               }
@@ -42940,7 +42950,6 @@
   self->private_data.s_decode_frame[0].v_dst_bytes_per_pixel = v_dst_bytes_per_pixel;
   self->private_data.s_decode_frame[0].v_dst_x = v_dst_x;
   self->private_data.s_decode_frame[0].v_dst_y = v_dst_y;
-  memcpy(self->private_data.s_decode_frame[0].v_src, v_src, sizeof(v_src));
   self->private_data.s_decode_frame[0].v_mark = v_mark;
   self->private_data.s_decode_frame[0].v_num_pixels32 = v_num_pixels32;
   self->private_data.s_decode_frame[0].v_lit_length = v_lit_length;
diff --git a/std/tga/decode_tga.wuffs b/std/tga/decode_tga.wuffs
index e01702d..fbeb585 100644
--- a/std/tga/decode_tga.wuffs
+++ b/std/tga/decode_tga.wuffs
@@ -71,6 +71,7 @@
 )(
 	dst_palette : array[4 * 256] base.u8,
 	src_palette : array[4 * 256] base.u8,
+	scratch     : array[4] base.u8,
 )
 
 pub func decoder.set_quirk_enabled!(quirk: base.u32, enabled: base.bool) {
@@ -158,7 +159,6 @@
 
 	} else if (this.header_image_type | 8) == 0x0A {
 		if this.header_pixel_depth == 0x10 {
-			// TODO: implement.
 			this.src_pixfmt = base.PIXEL_FORMAT__BGRA_NONPREMUL
 		} else if this.header_pixel_depth == 0x18 {
 			this.src_bytes_per_pixel = 3
@@ -215,7 +215,7 @@
 				this.src_palette[((i & 0xFF) * 4) + 1] = ((c5 << 3) | (c5 >> 2)) as base.u8
 				c5 = 0x1F & (c >> 10)
 				this.src_palette[((i & 0xFF) * 4) + 2] = ((c5 << 3) | (c5 >> 2)) as base.u8
-				// TODO: can the alpha value be zero (BGRA5551)?
+				// TODO: can the alpha value be zero (BGRA5551 not BGRX5551)?
 				this.src_palette[((i & 0xFF) * 4) + 3] = 0xFF
 			}
 			i += 1
@@ -288,13 +288,14 @@
 	var dst                 : slice base.u8
 	var dst_start           : base.u64
 	var src_palette         : slice base.u8
-	var src                 : array[4] base.u8
 	var mark                : base.u64
 	var num_pixels64        : base.u64
 	var num_pixels32        : base.u32[..= 0xFFFF]
 	var lit_length          : base.u32[..= 0xFFFF]
 	var num_dst_bytes       : base.u64[..= 0x1F_FFE0]
 	var num_src_bytes       : base.u32[..= 0x3_FFFC]
+	var c                   : base.u32
+	var c5                  : base.u32[..= 0x1F]
 
 	if this.call_sequence < 4 {
 		this.decode_frame_config?(dst: nullptr, src: args.src)
@@ -373,15 +374,28 @@
 					}
 
 				} else {
+					// Wuffs' base.pixel_swizzler doesn't support BGRX5551, so
+					// we manually convert to BGRX8888, one pixel at a time.
+
 					if lit_length > 0 {
-						// TODO: don't hard-code reddish purple.
-						src[0] = 0x7F
-						src[1] = 0x00
-						src[2] = 0xFF
-						src[3] = 0xFF
-						// TODO: be more efficient than swizzling one pixel at a time.
+						if args.src.length() < 2 {
+							yield? base."$short read"
+							continue.resume
+						}
+						c = args.src.peek_u16le_as_u32()
+						args.src.skip_u32_fast!(actual: 2, worst_case: 2)
+						c5 = 0x1F & (c >> 0)
+						this.scratch[0] = ((c5 << 3) | (c5 >> 2)) as base.u8
+						c5 = 0x1F & (c >> 5)
+						this.scratch[1] = ((c5 << 3) | (c5 >> 2)) as base.u8
+						c5 = 0x1F & (c >> 10)
+						this.scratch[2] = ((c5 << 3) | (c5 >> 2)) as base.u8
+						// TODO: can the alpha value be zero (BGRA5551 not BGRX5551)?
+						this.scratch[3] = 0xFF
 						this.swizzler.swizzle_interleaved_from_slice!(
-							dst: dst, dst_palette: args.dst.palette(), src: src[..])
+							dst: dst,
+							dst_palette: dst_palette,
+							src: this.scratch[.. 4])
 						if dst_bytes_per_pixel <= dst.length() {
 							dst = dst[dst_bytes_per_pixel ..]
 						}
diff --git a/test/3pdata/nia-checksums-of-tgasuite.txt b/test/3pdata/nia-checksums-of-tgasuite.txt
index e60c6c1..a8fa186 100644
--- a/test/3pdata/nia-checksums-of-tgasuite.txt
+++ b/test/3pdata/nia-checksums-of-tgasuite.txt
@@ -1,6 +1,6 @@
 # Generated by script/print-nia-checksums.sh
 9fbe866a test/3pdata/tgasuite/ubw8.tga
 5d75ef41 test/3pdata/tgasuite/ucm8.tga
-3da6493d test/3pdata/tgasuite/utc16.tga
+5d75ef41 test/3pdata/tgasuite/utc16.tga
 5d75ef41 test/3pdata/tgasuite/utc24.tga
 79781cfb test/3pdata/tgasuite/utc32.tga