std/jpeg: decode color images, not just gray

Updates #42
diff --git a/internal/cgen/base/all-impl.c b/internal/cgen/base/all-impl.c
index 34d6278..d58cf56 100644
--- a/internal/cgen/base/all-impl.c
+++ b/internal/cgen/base/all-impl.c
@@ -162,7 +162,9 @@
 #if !defined(WUFFS_CONFIG__MODULES) || defined(WUFFS_CONFIG__MODULE__BASE) || \
     defined(WUFFS_CONFIG__MODULE__BASE__PIXCONV)
 
-// ¡ INSERT base/pixconv-submodule.c.
+// ¡ INSERT base/pixconv-submodule-regular.c.
+
+// ¡ INSERT base/pixconv-submodule-ycck.c.
 
 #endif  // !defined(WUFFS_CONFIG__MODULES) ||
         // defined(WUFFS_CONFIG__MODULE__BASE) ||
diff --git a/internal/cgen/base/image-private.h b/internal/cgen/base/image-private.h
index 1cb8a67..7d3f01d 100644
--- a/internal/cgen/base/image-private.h
+++ b/internal/cgen/base/image-private.h
@@ -38,6 +38,38 @@
     wuffs_base__slice_u8 dst_palette,
     uint64_t num_pixels);
 
+WUFFS_BASE__MAYBE_STATIC wuffs_base__status  //
+wuffs_base__pixel_swizzler__swizzle_ycck(const wuffs_base__pixel_swizzler* p,
+                                         wuffs_base__pixel_buffer* dst,
+                                         wuffs_base__slice_u8 dst_palette,
+                                         uint32_t width,
+                                         uint32_t height,
+                                         wuffs_base__slice_u8 src0,
+                                         wuffs_base__slice_u8 src1,
+                                         wuffs_base__slice_u8 src2,
+                                         wuffs_base__slice_u8 src3,
+                                         uint32_t width0,
+                                         uint32_t width1,
+                                         uint32_t width2,
+                                         uint32_t width3,
+                                         uint32_t height0,
+                                         uint32_t height1,
+                                         uint32_t height2,
+                                         uint32_t height3,
+                                         uint32_t stride0,
+                                         uint32_t stride1,
+                                         uint32_t stride2,
+                                         uint32_t stride3,
+                                         uint8_t h0,
+                                         uint8_t h1,
+                                         uint8_t h2,
+                                         uint8_t h3,
+                                         uint8_t v0,
+                                         uint8_t v1,
+                                         uint8_t v2,
+                                         uint8_t v3,
+                                         bool triangle_filter_for_2to1);
+
 // ---------------- Images (Utility)
 
 #define wuffs_base__utility__make_pixel_format wuffs_base__make_pixel_format
diff --git a/internal/cgen/base/image-public.h b/internal/cgen/base/image-public.h
index 5f1e943..a0e632e 100644
--- a/internal/cgen/base/image-public.h
+++ b/internal/cgen/base/image-public.h
@@ -201,6 +201,58 @@
   return (a << 24) | (r << 16) | (g << 8) | (b << 0);
 }
 
+// wuffs_base__color_ycc__as__color_u32 converts from YCbCr to 0xAARRGGBB. The
+// alpha bits are always 0xFF.
+static inline wuffs_base__color_u32_argb_premul  //
+wuffs_base__color_ycc__as__color_u32(uint8_t yy, uint8_t cb, uint8_t cr) {
+  // Work in 16.16 fixed point arithmetic (so that 'one half' is (1 << 15)) and
+  // bias the chroma values by 0x80.
+  uint32_t yy32 = (((uint32_t)yy) << 16) | (1 << 15);
+  uint32_t cb32 = (((uint32_t)cb) - 0x80);
+  uint32_t cr32 = (((uint32_t)cr) - 0x80);
+
+  // The formulae:
+  //
+  //  R = Y                + 1.40200 * Cr
+  //  G = Y - 0.34414 * Cb - 0.71414 * Cr
+  //  B = Y + 1.77200 * Cb
+  //
+  // When scaled by 1<<16:
+  //
+  //  0.34414 becomes 0x0581A =  22554.
+  //  0.71414 becomes 0x0B6D2 =  46802.
+  //  1.40200 becomes 0x166E9 =  91881.
+  //  1.77200 becomes 0x1C5A2 = 116130.
+  //
+  // Since we're working in 16.16 fixed point arithmetic, masking by 0x00FF0000
+  // (possibly followed by a shift) gives the relevant 8 bits per channel.
+  //
+  // However, we need to saturate for overflow (above 0x00FFFFFF, but not so
+  // high that the MSB Most Significant Bit is set) or for underflow (below
+  // 0x00000000 as int32_t, which means that the MSB is set as uint32_t). In
+  // both cases, some of the high 8 bits (bits 24 ..= 31) will be set.
+  //
+  // "((uint32_t)(((int32_t)x) >> 31))" just replicates x's MSB across all 32
+  // bits. Prepending that with "~" inverts those bits. Thus, "~(etc)" is
+  // either 0xFFFFFFFF (for overflow) or 0x00000000 (for underflow).
+  uint32_t rr32 = yy32 + (0x166E9 * cr32);
+  uint32_t gg32 = yy32 - (0x0581A * cb32) - (0x0B6D2 * cr32);
+  uint32_t bb32 = yy32 + (0x1C5A2 * cb32);
+  if (rr32 >> 24) {
+    rr32 = ~((uint32_t)(((int32_t)rr32) >> 31));
+  }
+  if (gg32 >> 24) {
+    gg32 = ~((uint32_t)(((int32_t)gg32) >> 31));
+  }
+  if (bb32 >> 24) {
+    bb32 = ~((uint32_t)(((int32_t)bb32) >> 31));
+  }
+  return 0xFF000000 |                  //
+         ((0x00FF0000 & rr32) >> 0) |  //
+         ((0x00FF0000 & gg32) >> 8) |  //
+         ((0x00FF0000 & bb32) >> 16);
+}
+
 // --------
 
 typedef uint8_t wuffs_base__pixel_blend;
diff --git a/internal/cgen/base/pixconv-submodule.c b/internal/cgen/base/pixconv-submodule-regular.c
similarity index 100%
rename from internal/cgen/base/pixconv-submodule.c
rename to internal/cgen/base/pixconv-submodule-regular.c
diff --git a/internal/cgen/base/pixconv-submodule-ycck.c b/internal/cgen/base/pixconv-submodule-ycck.c
new file mode 100644
index 0000000..6705692
--- /dev/null
+++ b/internal/cgen/base/pixconv-submodule-ycck.c
@@ -0,0 +1,264 @@
+// Copyright 2023 The Wuffs Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// --------
+
+static inline uint32_t  //
+wuffs_base__u32__max_of_4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  return wuffs_base__u32__max(     //
+      wuffs_base__u32__max(a, b),  //
+      wuffs_base__u32__max(c, d));
+}
+
+static inline uint32_t  //
+wuffs_base__u32__min_of_5(uint32_t a,
+                          uint32_t b,
+                          uint32_t c,
+                          uint32_t d,
+                          uint32_t e) {
+  return wuffs_base__u32__min(          //
+      wuffs_base__u32__min(             //
+          wuffs_base__u32__min(a, b),   //
+          wuffs_base__u32__min(c, d)),  //
+      e);
+}
+
+// Preconditions: see all the checks made in
+// wuffs_base__pixel_swizzler__swizzle_ycck before calling this function. For
+// example, (width > 0) is a precondition, but there are many more.
+static void  //
+wuffs_base__pixel_swizzler__swizzle_ycc__general__box_filter(
+    const wuffs_base__pixel_swizzler* p,
+    wuffs_base__pixel_buffer* dst,
+    wuffs_base__slice_u8 dst_palette,
+    uint32_t width,
+    uint32_t height,
+    const uint8_t* src_ptr0,
+    const uint8_t* src_ptr1,
+    const uint8_t* src_ptr2,
+    uint32_t stride0,
+    uint32_t stride1,
+    uint32_t stride2,
+    uint32_t h0_out_of_12,
+    uint32_t h1_out_of_12,
+    uint32_t h2_out_of_12,
+    uint32_t v0_out_of_12,
+    uint32_t v1_out_of_12,
+    uint32_t v2_out_of_12) {
+  uint32_t iy0 = 0;
+  uint32_t iy1 = 0;
+  uint32_t iy2 = 0;
+  uint32_t y = 0;
+  while (true) {
+    const uint8_t* src_iter0 = src_ptr0;
+    const uint8_t* src_iter1 = src_ptr1;
+    const uint8_t* src_iter2 = src_ptr2;
+
+    uint32_t ix0 = 0;
+    uint32_t ix1 = 0;
+    uint32_t ix2 = 0;
+    uint32_t x = 0;
+    while (true) {
+      wuffs_base__pixel_buffer__set_color_u32_at(
+          dst, x, y,
+          wuffs_base__color_ycc__as__color_u32(*src_iter0, *src_iter1,
+                                               *src_iter2));
+
+      if ((x + 1) == width) {
+        break;
+      }
+      x = x + 1;
+      ix0 += h0_out_of_12;
+      if (ix0 >= 12) {
+        ix0 = 0;
+        src_iter0++;
+      }
+      ix1 += h1_out_of_12;
+      if (ix1 >= 12) {
+        ix1 = 0;
+        src_iter1++;
+      }
+      ix2 += h2_out_of_12;
+      if (ix2 >= 12) {
+        ix2 = 0;
+        src_iter2++;
+      }
+    }
+
+    if ((y + 1) == height) {
+      break;
+    }
+    y = y + 1;
+    iy0 += v0_out_of_12;
+    if (iy0 >= 12) {
+      iy0 = 0;
+      src_ptr0 += stride0;
+    }
+    iy1 += v1_out_of_12;
+    if (iy1 >= 12) {
+      iy1 = 0;
+      src_ptr1 += stride1;
+    }
+    iy2 += v2_out_of_12;
+    if (iy2 >= 12) {
+      iy2 = 0;
+      src_ptr2 += stride2;
+    }
+  }
+}
+
+// wuffs_base__pixel_swizzler__flattened_length is like
+// wuffs_base__table__flattened_length but returns uint64_t (not size_t) and
+// also accounts for subsampling.
+static uint64_t  //
+wuffs_base__pixel_swizzler__flattened_length(uint32_t width,
+                                             uint32_t height,
+                                             uint32_t stride,
+                                             uint32_t inv_h,
+                                             uint32_t inv_v) {
+  uint64_t scaled_width = (((uint64_t)width) + (inv_h - 1)) / inv_h;
+  uint64_t scaled_height = (((uint64_t)height) + (inv_v - 1)) / inv_v;
+  if (scaled_height <= 0) {
+    return 0;
+  }
+  return ((scaled_height - 1) * stride) + scaled_width;
+}
+
+WUFFS_BASE__MAYBE_STATIC wuffs_base__status  //
+wuffs_base__pixel_swizzler__swizzle_ycck(const wuffs_base__pixel_swizzler* p,
+                                         wuffs_base__pixel_buffer* dst,
+                                         wuffs_base__slice_u8 dst_palette,
+                                         uint32_t width,
+                                         uint32_t height,
+                                         wuffs_base__slice_u8 src0,
+                                         wuffs_base__slice_u8 src1,
+                                         wuffs_base__slice_u8 src2,
+                                         wuffs_base__slice_u8 src3,
+                                         uint32_t width0,
+                                         uint32_t width1,
+                                         uint32_t width2,
+                                         uint32_t width3,
+                                         uint32_t height0,
+                                         uint32_t height1,
+                                         uint32_t height2,
+                                         uint32_t height3,
+                                         uint32_t stride0,
+                                         uint32_t stride1,
+                                         uint32_t stride2,
+                                         uint32_t stride3,
+                                         uint8_t h0,
+                                         uint8_t h1,
+                                         uint8_t h2,
+                                         uint8_t h3,
+                                         uint8_t v0,
+                                         uint8_t v1,
+                                         uint8_t v2,
+                                         uint8_t v3,
+                                         bool triangle_filter_for_2to1) {
+  if (!p) {
+    return wuffs_base__make_status(wuffs_base__error__bad_receiver);
+  } else if ((h3 != 0) || (v3 != 0) || triangle_filter_for_2to1) {
+    // TODO: support the K in YCCK and support triangle_filter_for_2to1.
+    return wuffs_base__make_status(
+        wuffs_base__error__unsupported_pixel_swizzler_option);
+  } else if (!dst || (width > 0xFFFF) || (height > 0xFFFF) ||  //
+             (4 <= (h0 - 1)) || (4 <= (v0 - 1)) ||             //
+             (4 <= (h1 - 1)) || (4 <= (v1 - 1)) ||             //
+             (4 <= (h2 - 1)) || (4 <= (v2 - 1))) {
+    return wuffs_base__make_status(wuffs_base__error__bad_argument);
+  }
+
+  uint32_t max_incl_h = wuffs_base__u32__max_of_4(h0, h1, h2, h3);
+  uint32_t max_incl_v = wuffs_base__u32__max_of_4(v0, v1, v2, v3);
+  uint32_t inv_h0 = max_incl_h / h0;
+  uint32_t inv_h1 = max_incl_h / h1;
+  uint32_t inv_h2 = max_incl_h / h2;
+  uint32_t inv_v0 = max_incl_v / v0;
+  uint32_t inv_v1 = max_incl_v / v1;
+  uint32_t inv_v2 = max_incl_v / v2;
+  width = wuffs_base__u32__min_of_5(  //
+      width,                          //
+      width0 * inv_h0,                //
+      width1 * inv_h1,                //
+      width2 * inv_h2,                //
+      wuffs_base__pixel_config__width(&dst->pixcfg));
+  height = wuffs_base__u32__min_of_5(  //
+      height,                          //
+      height0 * inv_v0,                //
+      height1 * inv_v1,                //
+      height2 * inv_v2,                //
+      wuffs_base__pixel_config__height(&dst->pixcfg));
+
+  if (((h0 * inv_h0) != max_incl_h) ||  //
+      ((h1 * inv_h1) != max_incl_h) ||  //
+      ((h2 * inv_h2) != max_incl_h) ||  //
+      ((v0 * inv_v0) != max_incl_v) ||  //
+      ((v1 * inv_v1) != max_incl_v) ||  //
+      ((v2 * inv_v2) != max_incl_v) ||  //
+      (src0.len < wuffs_base__pixel_swizzler__flattened_length(
+                      width, height, stride0, inv_h0, inv_v0)) ||
+      (src1.len < wuffs_base__pixel_swizzler__flattened_length(
+                      width, height, stride1, inv_h1, inv_v1)) ||
+      (src2.len < wuffs_base__pixel_swizzler__flattened_length(
+                      width, height, stride2, inv_h2, inv_v2))) {
+    return wuffs_base__make_status(wuffs_base__error__bad_argument);
+  }
+
+  if (wuffs_base__pixel_format__is_planar(&dst->pixcfg.private_impl.pixfmt)) {
+    // TODO: see wuffs_base__pixel_buffer__set_color_u32_at's TODO.
+    return wuffs_base__make_status(
+        wuffs_base__error__unsupported_pixel_swizzler_option);
+  }
+
+  switch (dst->pixcfg.private_impl.pixfmt.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__Y:
+    case WUFFS_BASE__PIXEL_FORMAT__Y_16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__Y_16BE:
+    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
+    case WUFFS_BASE__PIXEL_FORMAT__BGR:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
+    case WUFFS_BASE__PIXEL_FORMAT__RGB:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
+      break;
+
+    default:
+      // TODO: see wuffs_base__pixel_buffer__set_color_u32_at's TODO.
+      return wuffs_base__make_status(
+          wuffs_base__error__unsupported_pixel_swizzler_option);
+  }
+
+  if ((width <= 0) || (height <= 0)) {
+    return wuffs_base__make_status(NULL);
+  }
+
+  wuffs_base__pixel_swizzler__swizzle_ycc__general__box_filter(
+      p, dst, dst_palette, width, height,  //
+      src0.ptr, src1.ptr, src2.ptr,        //
+      stride0, stride1, stride2,           //
+      (h0 * 12) / max_incl_h,              //
+      (h1 * 12) / max_incl_h,              //
+      (h2 * 12) / max_incl_h,              //
+      (v0 * 12) / max_incl_v,              //
+      (v1 * 12) / max_incl_v,              //
+      (v2 * 12) / max_incl_v);
+  return wuffs_base__make_status(NULL);
+}
diff --git a/internal/cgen/cgen.go b/internal/cgen/cgen.go
index dbee298..27e050e 100644
--- a/internal/cgen/cgen.go
+++ b/internal/cgen/cgen.go
@@ -123,16 +123,17 @@
 			}
 			buf := make(buffer, 0, 128*1024)
 			if err := expandBangBangInsert(&buf, embedBaseAllImplC.Trim(), map[string]func(*buffer) error{
-				"// ¡ INSERT InterfaceDeclarations.\n":      insertInterfaceDeclarations,
-				"// ¡ INSERT InterfaceDefinitions.\n":       insertInterfaceDefinitions,
-				"// ¡ INSERT base/all-private.h.\n":         insertBaseAllPrivateH,
-				"// ¡ INSERT base/all-public.h.\n":          insertBaseAllPublicH,
-				"// ¡ INSERT base/copyright\n":              insertBaseCopyright,
-				"// ¡ INSERT base/floatconv-submodule.c.\n": insertBaseFloatConvSubmoduleC,
-				"// ¡ INSERT base/intconv-submodule.c.\n":   insertBaseIntConvSubmoduleC,
-				"// ¡ INSERT base/magic-submodule.c.\n":     insertBaseMagicSubmoduleC,
-				"// ¡ INSERT base/pixconv-submodule.c.\n":   insertBasePixConvSubmoduleC,
-				"// ¡ INSERT base/utf8-submodule.c.\n":      insertBaseUTF8SubmoduleC,
+				"// ¡ INSERT InterfaceDeclarations.\n":            insertInterfaceDeclarations,
+				"// ¡ INSERT InterfaceDefinitions.\n":             insertInterfaceDefinitions,
+				"// ¡ INSERT base/all-private.h.\n":               insertBaseAllPrivateH,
+				"// ¡ INSERT base/all-public.h.\n":                insertBaseAllPublicH,
+				"// ¡ INSERT base/copyright\n":                    insertBaseCopyright,
+				"// ¡ INSERT base/floatconv-submodule.c.\n":       insertBaseFloatConvSubmoduleC,
+				"// ¡ INSERT base/intconv-submodule.c.\n":         insertBaseIntConvSubmoduleC,
+				"// ¡ INSERT base/magic-submodule.c.\n":           insertBaseMagicSubmoduleC,
+				"// ¡ INSERT base/pixconv-submodule-regular.c.\n": insertBasePixConvSubmoduleRegularC,
+				"// ¡ INSERT base/pixconv-submodule-ycck.c.\n":    insertBasePixConvSubmoduleYcckC,
+				"// ¡ INSERT base/utf8-submodule.c.\n":            insertBaseUTF8SubmoduleC,
 				"// ¡ INSERT vtable names.\n": func(b *buffer) error {
 					for _, n := range builtin.Interfaces {
 						buf.printf("const char wuffs_base__%s__vtable_name[] = "+
@@ -379,8 +380,13 @@
 	return nil
 }
 
-func insertBasePixConvSubmoduleC(buf *buffer) error {
-	buf.writes(embedBasePixConvSubmoduleC.Trim())
+func insertBasePixConvSubmoduleRegularC(buf *buffer) error {
+	buf.writes(embedBasePixConvSubmoduleRegularC.Trim())
+	return nil
+}
+
+func insertBasePixConvSubmoduleYcckC(buf *buffer) error {
+	buf.writes(embedBasePixConvSubmoduleYcckC.Trim())
 	return nil
 }
 
diff --git a/internal/cgen/embed.go b/internal/cgen/embed.go
index 24c3211..1fae457 100644
--- a/internal/cgen/embed.go
+++ b/internal/cgen/embed.go
@@ -97,8 +97,11 @@
 //go:embed base/magic-submodule.c
 var embedBaseMagicSubmoduleC EmbeddedString
 
-//go:embed base/pixconv-submodule.c
-var embedBasePixConvSubmoduleC EmbeddedString
+//go:embed base/pixconv-submodule-regular.c
+var embedBasePixConvSubmoduleRegularC EmbeddedString
+
+//go:embed base/pixconv-submodule-ycck.c
+var embedBasePixConvSubmoduleYcckC EmbeddedString
 
 //go:embed base/utf8-submodule.c
 var embedBaseUTF8SubmoduleC EmbeddedString
diff --git a/lang/builtin/builtin.go b/lang/builtin/builtin.go
index e3a172a..8f61f2e 100644
--- a/lang/builtin/builtin.go
+++ b/lang/builtin/builtin.go
@@ -643,6 +643,36 @@
 		"dst: slice u8, dst_palette: slice u8, src: roslice u8) u64",
 	"pixel_swizzler.swizzle_interleaved_transparent_black!(" +
 		"dst: slice u8, dst_palette: slice u8, num_pixels: u64) u64",
+	"pixel_swizzler.swizzle_ycck!(" +
+		"dst: ptr pixel_buffer," +
+		"dst_palette: slice u8," +
+		"width: u32[..= 0xFFFF]," +
+		"height: u32[..= 0xFFFF]," +
+		"src0: slice u8," +
+		"src1: slice u8," +
+		"src2: slice u8," +
+		"src3: slice u8," +
+		"width0: u32[..= 0x4_0000]," +
+		"width1: u32[..= 0x4_0000]," +
+		"width2: u32[..= 0x4_0000]," +
+		"width3: u32[..= 0x4_0000]," +
+		"height0: u32[..= 0x4_0000]," +
+		"height1: u32[..= 0x4_0000]," +
+		"height2: u32[..= 0x4_0000]," +
+		"height3: u32[..= 0x4_0000]," +
+		"stride0: u32," +
+		"stride1: u32," +
+		"stride2: u32," +
+		"stride3: u32," +
+		"h0: u8[..= 4]," +
+		"h1: u8[..= 4]," +
+		"h2: u8[..= 4]," +
+		"h3: u8[..= 4]," +
+		"v0: u8[..= 4]," +
+		"v1: u8[..= 4]," +
+		"v2: u8[..= 4]," +
+		"v3: u8[..= 4]," +
+		"triangle_filter_for_2to1: bool) status",
 
 	// ---- arm_crc32_utility
 
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 868f315..2d10520 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -3794,6 +3794,58 @@
   return (a << 24) | (r << 16) | (g << 8) | (b << 0);
 }
 
+// wuffs_base__color_ycc__as__color_u32 converts from YCbCr to 0xAARRGGBB. The
+// alpha bits are always 0xFF.
+static inline wuffs_base__color_u32_argb_premul  //
+wuffs_base__color_ycc__as__color_u32(uint8_t yy, uint8_t cb, uint8_t cr) {
+  // Work in 16.16 fixed point arithmetic (so that 'one half' is (1 << 15)) and
+  // bias the chroma values by 0x80.
+  uint32_t yy32 = (((uint32_t)yy) << 16) | (1 << 15);
+  uint32_t cb32 = (((uint32_t)cb) - 0x80);
+  uint32_t cr32 = (((uint32_t)cr) - 0x80);
+
+  // The formulae:
+  //
+  //  R = Y                + 1.40200 * Cr
+  //  G = Y - 0.34414 * Cb - 0.71414 * Cr
+  //  B = Y + 1.77200 * Cb
+  //
+  // When scaled by 1<<16:
+  //
+  //  0.34414 becomes 0x0581A =  22554.
+  //  0.71414 becomes 0x0B6D2 =  46802.
+  //  1.40200 becomes 0x166E9 =  91881.
+  //  1.77200 becomes 0x1C5A2 = 116130.
+  //
+  // Since we're working in 16.16 fixed point arithmetic, masking by 0x00FF0000
+  // (possibly followed by a shift) gives the relevant 8 bits per channel.
+  //
+  // However, we need to saturate for overflow (above 0x00FFFFFF, but not so
+  // high that the MSB Most Significant Bit is set) or for underflow (below
+  // 0x00000000 as int32_t, which means that the MSB is set as uint32_t). In
+  // both cases, some of the high 8 bits (bits 24 ..= 31) will be set.
+  //
+  // "((uint32_t)(((int32_t)x) >> 31))" just replicates x's MSB across all 32
+  // bits. Prepending that with "~" inverts those bits. Thus, "~(etc)" is
+  // either 0xFFFFFFFF (for overflow) or 0x00000000 (for underflow).
+  uint32_t rr32 = yy32 + (0x166E9 * cr32);
+  uint32_t gg32 = yy32 - (0x0581A * cb32) - (0x0B6D2 * cr32);
+  uint32_t bb32 = yy32 + (0x1C5A2 * cb32);
+  if (rr32 >> 24) {
+    rr32 = ~((uint32_t)(((int32_t)rr32) >> 31));
+  }
+  if (gg32 >> 24) {
+    gg32 = ~((uint32_t)(((int32_t)gg32) >> 31));
+  }
+  if (bb32 >> 24) {
+    bb32 = ~((uint32_t)(((int32_t)bb32) >> 31));
+  }
+  return 0xFF000000 |                  //
+         ((0x00FF0000 & rr32) >> 0) |  //
+         ((0x00FF0000 & gg32) >> 8) |  //
+         ((0x00FF0000 & bb32) >> 16);
+}
+
 // --------
 
 typedef uint8_t wuffs_base__pixel_blend;
@@ -12358,6 +12410,38 @@
     wuffs_base__slice_u8 dst_palette,
     uint64_t num_pixels);
 
+WUFFS_BASE__MAYBE_STATIC wuffs_base__status  //
+wuffs_base__pixel_swizzler__swizzle_ycck(const wuffs_base__pixel_swizzler* p,
+                                         wuffs_base__pixel_buffer* dst,
+                                         wuffs_base__slice_u8 dst_palette,
+                                         uint32_t width,
+                                         uint32_t height,
+                                         wuffs_base__slice_u8 src0,
+                                         wuffs_base__slice_u8 src1,
+                                         wuffs_base__slice_u8 src2,
+                                         wuffs_base__slice_u8 src3,
+                                         uint32_t width0,
+                                         uint32_t width1,
+                                         uint32_t width2,
+                                         uint32_t width3,
+                                         uint32_t height0,
+                                         uint32_t height1,
+                                         uint32_t height2,
+                                         uint32_t height3,
+                                         uint32_t stride0,
+                                         uint32_t stride1,
+                                         uint32_t stride2,
+                                         uint32_t stride3,
+                                         uint8_t h0,
+                                         uint8_t h1,
+                                         uint8_t h2,
+                                         uint8_t h3,
+                                         uint8_t v0,
+                                         uint8_t v1,
+                                         uint8_t v2,
+                                         uint8_t v3,
+                                         bool triangle_filter_for_2to1);
+
 // ---------------- Images (Utility)
 
 #define wuffs_base__utility__make_pixel_format wuffs_base__make_pixel_format
@@ -22250,6 +22334,257 @@
   return 0;
 }
 
+// --------
+
+static inline uint32_t  //
+wuffs_base__u32__max_of_4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  return wuffs_base__u32__max(     //
+      wuffs_base__u32__max(a, b),  //
+      wuffs_base__u32__max(c, d));
+}
+
+static inline uint32_t  //
+wuffs_base__u32__min_of_5(uint32_t a,
+                          uint32_t b,
+                          uint32_t c,
+                          uint32_t d,
+                          uint32_t e) {
+  return wuffs_base__u32__min(          //
+      wuffs_base__u32__min(             //
+          wuffs_base__u32__min(a, b),   //
+          wuffs_base__u32__min(c, d)),  //
+      e);
+}
+
+// Preconditions: see all the checks made in
+// wuffs_base__pixel_swizzler__swizzle_ycck before calling this function. For
+// example, (width > 0) is a precondition, but there are many more.
+static void  //
+wuffs_base__pixel_swizzler__swizzle_ycc__general__box_filter(
+    const wuffs_base__pixel_swizzler* p,
+    wuffs_base__pixel_buffer* dst,
+    wuffs_base__slice_u8 dst_palette,
+    uint32_t width,
+    uint32_t height,
+    const uint8_t* src_ptr0,
+    const uint8_t* src_ptr1,
+    const uint8_t* src_ptr2,
+    uint32_t stride0,
+    uint32_t stride1,
+    uint32_t stride2,
+    uint32_t h0_out_of_12,
+    uint32_t h1_out_of_12,
+    uint32_t h2_out_of_12,
+    uint32_t v0_out_of_12,
+    uint32_t v1_out_of_12,
+    uint32_t v2_out_of_12) {
+  uint32_t iy0 = 0;
+  uint32_t iy1 = 0;
+  uint32_t iy2 = 0;
+  uint32_t y = 0;
+  while (true) {
+    const uint8_t* src_iter0 = src_ptr0;
+    const uint8_t* src_iter1 = src_ptr1;
+    const uint8_t* src_iter2 = src_ptr2;
+
+    uint32_t ix0 = 0;
+    uint32_t ix1 = 0;
+    uint32_t ix2 = 0;
+    uint32_t x = 0;
+    while (true) {
+      wuffs_base__pixel_buffer__set_color_u32_at(
+          dst, x, y,
+          wuffs_base__color_ycc__as__color_u32(*src_iter0, *src_iter1,
+                                               *src_iter2));
+
+      if ((x + 1) == width) {
+        break;
+      }
+      x = x + 1;
+      ix0 += h0_out_of_12;
+      if (ix0 >= 12) {
+        ix0 = 0;
+        src_iter0++;
+      }
+      ix1 += h1_out_of_12;
+      if (ix1 >= 12) {
+        ix1 = 0;
+        src_iter1++;
+      }
+      ix2 += h2_out_of_12;
+      if (ix2 >= 12) {
+        ix2 = 0;
+        src_iter2++;
+      }
+    }
+
+    if ((y + 1) == height) {
+      break;
+    }
+    y = y + 1;
+    iy0 += v0_out_of_12;
+    if (iy0 >= 12) {
+      iy0 = 0;
+      src_ptr0 += stride0;
+    }
+    iy1 += v1_out_of_12;
+    if (iy1 >= 12) {
+      iy1 = 0;
+      src_ptr1 += stride1;
+    }
+    iy2 += v2_out_of_12;
+    if (iy2 >= 12) {
+      iy2 = 0;
+      src_ptr2 += stride2;
+    }
+  }
+}
+
+// wuffs_base__pixel_swizzler__flattened_length is like
+// wuffs_base__table__flattened_length but returns uint64_t (not size_t) and
+// also accounts for subsampling.
+static uint64_t  //
+wuffs_base__pixel_swizzler__flattened_length(uint32_t width,
+                                             uint32_t height,
+                                             uint32_t stride,
+                                             uint32_t inv_h,
+                                             uint32_t inv_v) {
+  uint64_t scaled_width = (((uint64_t)width) + (inv_h - 1)) / inv_h;
+  uint64_t scaled_height = (((uint64_t)height) + (inv_v - 1)) / inv_v;
+  if (scaled_height <= 0) {
+    return 0;
+  }
+  return ((scaled_height - 1) * stride) + scaled_width;
+}
+
+WUFFS_BASE__MAYBE_STATIC wuffs_base__status  //
+wuffs_base__pixel_swizzler__swizzle_ycck(const wuffs_base__pixel_swizzler* p,
+                                         wuffs_base__pixel_buffer* dst,
+                                         wuffs_base__slice_u8 dst_palette,
+                                         uint32_t width,
+                                         uint32_t height,
+                                         wuffs_base__slice_u8 src0,
+                                         wuffs_base__slice_u8 src1,
+                                         wuffs_base__slice_u8 src2,
+                                         wuffs_base__slice_u8 src3,
+                                         uint32_t width0,
+                                         uint32_t width1,
+                                         uint32_t width2,
+                                         uint32_t width3,
+                                         uint32_t height0,
+                                         uint32_t height1,
+                                         uint32_t height2,
+                                         uint32_t height3,
+                                         uint32_t stride0,
+                                         uint32_t stride1,
+                                         uint32_t stride2,
+                                         uint32_t stride3,
+                                         uint8_t h0,
+                                         uint8_t h1,
+                                         uint8_t h2,
+                                         uint8_t h3,
+                                         uint8_t v0,
+                                         uint8_t v1,
+                                         uint8_t v2,
+                                         uint8_t v3,
+                                         bool triangle_filter_for_2to1) {
+  if (!p) {
+    return wuffs_base__make_status(wuffs_base__error__bad_receiver);
+  } else if ((h3 != 0) || (v3 != 0) || triangle_filter_for_2to1) {
+    // TODO: support the K in YCCK and support triangle_filter_for_2to1.
+    return wuffs_base__make_status(
+        wuffs_base__error__unsupported_pixel_swizzler_option);
+  } else if (!dst || (width > 0xFFFF) || (height > 0xFFFF) ||  //
+             (4 <= (h0 - 1)) || (4 <= (v0 - 1)) ||             //
+             (4 <= (h1 - 1)) || (4 <= (v1 - 1)) ||             //
+             (4 <= (h2 - 1)) || (4 <= (v2 - 1))) {
+    return wuffs_base__make_status(wuffs_base__error__bad_argument);
+  }
+
+  uint32_t max_incl_h = wuffs_base__u32__max_of_4(h0, h1, h2, h3);
+  uint32_t max_incl_v = wuffs_base__u32__max_of_4(v0, v1, v2, v3);
+  uint32_t inv_h0 = max_incl_h / h0;
+  uint32_t inv_h1 = max_incl_h / h1;
+  uint32_t inv_h2 = max_incl_h / h2;
+  uint32_t inv_v0 = max_incl_v / v0;
+  uint32_t inv_v1 = max_incl_v / v1;
+  uint32_t inv_v2 = max_incl_v / v2;
+  width = wuffs_base__u32__min_of_5(  //
+      width,                          //
+      width0 * inv_h0,                //
+      width1 * inv_h1,                //
+      width2 * inv_h2,                //
+      wuffs_base__pixel_config__width(&dst->pixcfg));
+  height = wuffs_base__u32__min_of_5(  //
+      height,                          //
+      height0 * inv_v0,                //
+      height1 * inv_v1,                //
+      height2 * inv_v2,                //
+      wuffs_base__pixel_config__height(&dst->pixcfg));
+
+  if (((h0 * inv_h0) != max_incl_h) ||  //
+      ((h1 * inv_h1) != max_incl_h) ||  //
+      ((h2 * inv_h2) != max_incl_h) ||  //
+      ((v0 * inv_v0) != max_incl_v) ||  //
+      ((v1 * inv_v1) != max_incl_v) ||  //
+      ((v2 * inv_v2) != max_incl_v) ||  //
+      (src0.len < wuffs_base__pixel_swizzler__flattened_length(
+                      width, height, stride0, inv_h0, inv_v0)) ||
+      (src1.len < wuffs_base__pixel_swizzler__flattened_length(
+                      width, height, stride1, inv_h1, inv_v1)) ||
+      (src2.len < wuffs_base__pixel_swizzler__flattened_length(
+                      width, height, stride2, inv_h2, inv_v2))) {
+    return wuffs_base__make_status(wuffs_base__error__bad_argument);
+  }
+
+  if (wuffs_base__pixel_format__is_planar(&dst->pixcfg.private_impl.pixfmt)) {
+    // TODO: see wuffs_base__pixel_buffer__set_color_u32_at's TODO.
+    return wuffs_base__make_status(
+        wuffs_base__error__unsupported_pixel_swizzler_option);
+  }
+
+  switch (dst->pixcfg.private_impl.pixfmt.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__Y:
+    case WUFFS_BASE__PIXEL_FORMAT__Y_16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__Y_16BE:
+    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
+    case WUFFS_BASE__PIXEL_FORMAT__BGR:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
+    case WUFFS_BASE__PIXEL_FORMAT__RGB:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
+      break;
+
+    default:
+      // TODO: see wuffs_base__pixel_buffer__set_color_u32_at's TODO.
+      return wuffs_base__make_status(
+          wuffs_base__error__unsupported_pixel_swizzler_option);
+  }
+
+  if ((width <= 0) || (height <= 0)) {
+    return wuffs_base__make_status(NULL);
+  }
+
+  wuffs_base__pixel_swizzler__swizzle_ycc__general__box_filter(
+      p, dst, dst_palette, width, height,  //
+      src0.ptr, src1.ptr, src2.ptr,        //
+      stride0, stride1, stride2,           //
+      (h0 * 12) / max_incl_h,              //
+      (h1 * 12) / max_incl_h,              //
+      (h2 * 12) / max_incl_h,              //
+      (v0 * 12) / max_incl_v,              //
+      (v1 * 12) / max_incl_v,              //
+      (v2 * 12) / max_incl_v);
+  return wuffs_base__make_status(NULL);
+}
+
 #endif  // !defined(WUFFS_CONFIG__MODULES) ||
         // defined(WUFFS_CONFIG__MODULE__BASE) ||
         // defined(WUFFS_CONFIG__MODULE__BASE__PIXCONV)
@@ -35637,7 +35972,8 @@
 static wuffs_base__status
 wuffs_jpeg__decoder__swizzle_colorful(
     wuffs_jpeg__decoder* self,
-    wuffs_base__pixel_buffer* a_dst);
+    wuffs_base__pixel_buffer* a_dst,
+    wuffs_base__slice_u8 a_workbuf);
 
 static uint32_t
 wuffs_jpeg__decoder__decode_mcu(
@@ -38276,7 +38612,7 @@
       }
       goto ok;
     }
-    v_status = wuffs_jpeg__decoder__swizzle_colorful(self, a_dst);
+    v_status = wuffs_jpeg__decoder__swizzle_colorful(self, a_dst, a_workbuf);
     status = v_status;
     if (wuffs_base__status__is_error(&status)) {
       goto exit;
@@ -38728,42 +39064,65 @@
 static wuffs_base__status
 wuffs_jpeg__decoder__swizzle_colorful(
     wuffs_jpeg__decoder* self,
-    wuffs_base__pixel_buffer* a_dst) {
-  wuffs_base__pixel_format v_dst_pixfmt = {0};
-  uint32_t v_dst_bits_per_pixel = 0;
-  uint32_t v_dst_bytes_per_pixel = 0;
-  wuffs_base__table_u8 v_tab = {0};
-  wuffs_base__slice_u8 v_dst = {0};
-  uint8_t v_src[4] = {0};
-  uint32_t v_y = 0;
-  uint32_t v_x = 0;
-  uint64_t v_d = 0;
+    wuffs_base__pixel_buffer* a_dst,
+    wuffs_base__slice_u8 a_workbuf) {
+  wuffs_base__slice_u8 v_src0 = {0};
+  wuffs_base__slice_u8 v_src1 = {0};
+  wuffs_base__slice_u8 v_src2 = {0};
+  wuffs_base__slice_u8 v_src3 = {0};
+  wuffs_base__status v_status = wuffs_base__make_status(NULL);
 
-  v_dst_pixfmt = wuffs_base__pixel_buffer__pixel_format(a_dst);
-  v_dst_bits_per_pixel = wuffs_base__pixel_format__bits_per_pixel(&v_dst_pixfmt);
-  if ((v_dst_bits_per_pixel & 7) != 0) {
-    return wuffs_base__make_status(wuffs_base__error__unsupported_option);
+  if ((self->private_impl.f_components_workbuf_offsets[0] <= self->private_impl.f_components_workbuf_offsets[1]) && (self->private_impl.f_components_workbuf_offsets[1] <= ((uint64_t)(a_workbuf.len)))) {
+    v_src0 = wuffs_base__slice_u8__subslice_ij(a_workbuf,
+        self->private_impl.f_components_workbuf_offsets[0],
+        self->private_impl.f_components_workbuf_offsets[1]);
   }
-  v_dst_bytes_per_pixel = (v_dst_bits_per_pixel / 8);
-  v_tab = wuffs_base__pixel_buffer__plane(a_dst, 0);
-  v_y = 0;
-  while (v_y < self->private_impl.f_height) {
-    v_x = 0;
-    while (v_x < self->private_impl.f_width) {
-      v_dst = wuffs_base__table_u8__row_u32(v_tab, v_y);
-      v_d = ((uint64_t)((v_x * v_dst_bytes_per_pixel)));
-      if (v_d < ((uint64_t)(v_dst.len))) {
-        v_src[0] = ((uint8_t)((v_x & 255)));
-        v_src[1] = 127;
-        v_src[2] = ((uint8_t)((v_y & 255)));
-        v_src[3] = 255;
-        wuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(&self->private_impl.f_swizzler, wuffs_base__slice_u8__subslice_i(v_dst, v_d), wuffs_base__pixel_buffer__palette_or_else(a_dst, wuffs_base__make_slice_u8(self->private_data.f_dst_palette, 1024)), wuffs_base__make_slice_u8(v_src, 4));
-      }
-      v_x += 1;
-    }
-    v_y += 1;
+  if ((self->private_impl.f_components_workbuf_offsets[1] <= self->private_impl.f_components_workbuf_offsets[2]) && (self->private_impl.f_components_workbuf_offsets[2] <= ((uint64_t)(a_workbuf.len)))) {
+    v_src1 = wuffs_base__slice_u8__subslice_ij(a_workbuf,
+        self->private_impl.f_components_workbuf_offsets[1],
+        self->private_impl.f_components_workbuf_offsets[2]);
   }
-  return wuffs_base__make_status(NULL);
+  if ((self->private_impl.f_components_workbuf_offsets[2] <= self->private_impl.f_components_workbuf_offsets[3]) && (self->private_impl.f_components_workbuf_offsets[3] <= ((uint64_t)(a_workbuf.len)))) {
+    v_src2 = wuffs_base__slice_u8__subslice_ij(a_workbuf,
+        self->private_impl.f_components_workbuf_offsets[2],
+        self->private_impl.f_components_workbuf_offsets[3]);
+  }
+  if ((self->private_impl.f_components_workbuf_offsets[3] <= self->private_impl.f_components_workbuf_offsets[4]) && (self->private_impl.f_components_workbuf_offsets[4] <= ((uint64_t)(a_workbuf.len)))) {
+    v_src3 = wuffs_base__slice_u8__subslice_ij(a_workbuf,
+        self->private_impl.f_components_workbuf_offsets[3],
+        self->private_impl.f_components_workbuf_offsets[4]);
+  }
+  v_status = wuffs_base__pixel_swizzler__swizzle_ycck(&self->private_impl.f_swizzler,
+      a_dst,
+      wuffs_base__pixel_buffer__palette_or_else(a_dst, wuffs_base__make_slice_u8(self->private_data.f_dst_palette, 1024)),
+      self->private_impl.f_width,
+      self->private_impl.f_height,
+      v_src0,
+      v_src1,
+      v_src2,
+      v_src3,
+      (8 * self->private_impl.f_width_in_mcus * ((uint32_t)(self->private_impl.f_components_h[0]))),
+      (8 * self->private_impl.f_width_in_mcus * ((uint32_t)(self->private_impl.f_components_h[1]))),
+      (8 * self->private_impl.f_width_in_mcus * ((uint32_t)(self->private_impl.f_components_h[2]))),
+      (8 * self->private_impl.f_width_in_mcus * ((uint32_t)(self->private_impl.f_components_h[3]))),
+      (8 * self->private_impl.f_height_in_mcus * ((uint32_t)(self->private_impl.f_components_v[0]))),
+      (8 * self->private_impl.f_height_in_mcus * ((uint32_t)(self->private_impl.f_components_v[1]))),
+      (8 * self->private_impl.f_height_in_mcus * ((uint32_t)(self->private_impl.f_components_v[2]))),
+      (8 * self->private_impl.f_height_in_mcus * ((uint32_t)(self->private_impl.f_components_v[3]))),
+      self->private_impl.f_components_workbuf_strides[0],
+      self->private_impl.f_components_workbuf_strides[1],
+      self->private_impl.f_components_workbuf_strides[2],
+      self->private_impl.f_components_workbuf_strides[3],
+      self->private_impl.f_components_h[0],
+      self->private_impl.f_components_h[1],
+      self->private_impl.f_components_h[2],
+      self->private_impl.f_components_h[3],
+      self->private_impl.f_components_v[0],
+      self->private_impl.f_components_v[1],
+      self->private_impl.f_components_v[2],
+      self->private_impl.f_components_v[3],
+      false);
+  return wuffs_base__status__ensure_not_a_suspension(v_status);
 }
 
 // -------- func jpeg.decoder.frame_dirty_rect
diff --git a/std/jpeg/decode_jpeg.wuffs b/std/jpeg/decode_jpeg.wuffs
index 57e5ab2..297d1dc 100644
--- a/std/jpeg/decode_jpeg.wuffs
+++ b/std/jpeg/decode_jpeg.wuffs
@@ -1083,7 +1083,7 @@
         status = this.swizzle_gray!(dst: args.dst, workbuf: args.workbuf)
         return status
     }
-    status = this.swizzle_colorful!(dst: args.dst)
+    status = this.swizzle_colorful!(dst: args.dst, workbuf: args.workbuf)
     return status
 }
 
@@ -1402,53 +1402,64 @@
     return ok
 }
 
-pri func decoder.swizzle_colorful!(dst: ptr base.pixel_buffer) base.status {
-    var dst_pixfmt          : base.pixel_format
-    var dst_bits_per_pixel  : base.u32[..= 256]
-    var dst_bytes_per_pixel : base.u32[..= 32]
-    var tab                 : table base.u8
-    var dst                 : slice base.u8
-    var src                 : array[4] base.u8
-    var y                   : base.u32
-    var x                   : base.u32
-    var d                   : base.u64
+pri func decoder.swizzle_colorful!(dst: ptr base.pixel_buffer, workbuf: slice base.u8) base.status {
+    var src0   : slice base.u8
+    var src1   : slice base.u8
+    var src2   : slice base.u8
+    var src3   : slice base.u8
+    var status : base.status
 
-    // TODO: the dst_pixfmt variable shouldn't be necessary. We should be able
-    // to chain the two calls: "args.dst.pixel_format().bits_per_pixel()".
-    dst_pixfmt = args.dst.pixel_format()
-    dst_bits_per_pixel = dst_pixfmt.bits_per_pixel()
-    if (dst_bits_per_pixel & 7) <> 0 {
-        return base."#unsupported option"
+    if (this.components_workbuf_offsets[0] <= this.components_workbuf_offsets[1]) and
+            (this.components_workbuf_offsets[1] <= args.workbuf.length()) {
+        src0 = args.workbuf[this.components_workbuf_offsets[0] .. this.components_workbuf_offsets[1]]
     }
-    dst_bytes_per_pixel = dst_bits_per_pixel / 8
 
-    // TODO: actually decode the pixels. Until then, fill with gradient tiles.
-    tab = args.dst.plane(p: 0)
-    y = 0
-    while y < this.height {
-        assert y < 0xFFFF via "a < b: a < c; c <= b"(c: this.height)
-        x = 0
-        while x < this.width,
-                inv y < 0xFFFF,
-        {
-            assert x < 0xFFFF via "a < b: a < c; c <= b"(c: this.width)
-            dst = tab.row_u32(y: y)
-            d = (x * dst_bytes_per_pixel) as base.u64
-            if d < dst.length() {
-                src[0] = (x & 0xFF) as base.u8
-                src[1] = 0x7F
-                src[2] = (y & 0xFF) as base.u8
-                src[3] = 0xFF
-                this.swizzler.swizzle_interleaved_from_slice!(
-                        dst: dst[d ..],
-                        dst_palette: args.dst.palette_or_else(fallback: this.dst_palette[..]),
-                        src: src[.. 4])
-            }
-            x += 1
-        } endwhile
-        y += 1
-    } endwhile
-    return ok
+    if (this.components_workbuf_offsets[1] <= this.components_workbuf_offsets[2]) and
+            (this.components_workbuf_offsets[2] <= args.workbuf.length()) {
+        src1 = args.workbuf[this.components_workbuf_offsets[1] .. this.components_workbuf_offsets[2]]
+    }
+
+    if (this.components_workbuf_offsets[2] <= this.components_workbuf_offsets[3]) and
+            (this.components_workbuf_offsets[3] <= args.workbuf.length()) {
+        src2 = args.workbuf[this.components_workbuf_offsets[2] .. this.components_workbuf_offsets[3]]
+    }
+
+    if (this.components_workbuf_offsets[3] <= this.components_workbuf_offsets[4]) and
+            (this.components_workbuf_offsets[4] <= args.workbuf.length()) {
+        src3 = args.workbuf[this.components_workbuf_offsets[3] .. this.components_workbuf_offsets[4]]
+    }
+
+    status = this.swizzler.swizzle_ycck!(
+            dst: args.dst,
+            dst_palette: args.dst.palette_or_else(fallback: this.dst_palette[..]),
+            width: this.width,
+            height: this.height,
+            src0: src0,
+            src1: src1,
+            src2: src2,
+            src3: src3,
+            width0: 8 * this.width_in_mcus * (this.components_h[0] as base.u32),
+            width1: 8 * this.width_in_mcus * (this.components_h[1] as base.u32),
+            width2: 8 * this.width_in_mcus * (this.components_h[2] as base.u32),
+            width3: 8 * this.width_in_mcus * (this.components_h[3] as base.u32),
+            height0: 8 * this.height_in_mcus * (this.components_v[0] as base.u32),
+            height1: 8 * this.height_in_mcus * (this.components_v[1] as base.u32),
+            height2: 8 * this.height_in_mcus * (this.components_v[2] as base.u32),
+            height3: 8 * this.height_in_mcus * (this.components_v[3] as base.u32),
+            stride0: this.components_workbuf_strides[0],
+            stride1: this.components_workbuf_strides[1],
+            stride2: this.components_workbuf_strides[2],
+            stride3: this.components_workbuf_strides[3],
+            h0: this.components_h[0],
+            h1: this.components_h[1],
+            h2: this.components_h[2],
+            h3: this.components_h[3],
+            v0: this.components_v[0],
+            v1: this.components_v[1],
+            v2: this.components_v[2],
+            v3: this.components_v[3],
+            triangle_filter_for_2to1: false)
+    return status
 }
 
 pub func decoder.frame_dirty_rect() base.rect_ie_u32 {
diff --git a/test/c/std/jpeg.c b/test/c/std/jpeg.c
index 2c399c1..aba5061 100644
--- a/test/c/std/jpeg.c
+++ b/test/c/std/jpeg.c
@@ -96,7 +96,7 @@
                    WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED));
   return do_test__wuffs_base__image_decoder(
       wuffs_jpeg__decoder__upcast_as__wuffs_base__image_decoder(&dec),
-      "test/data/bricks-color.jpeg", 0, SIZE_MAX, 160, 120, 0xFF777F9F);
+      "test/data/bricks-color.jpeg", 0, SIZE_MAX, 160, 120, 0xFF012466);
 }
 
 const char*  //
diff --git a/test/c/std/wbmp.c b/test/c/std/wbmp.c
index f18b2a5..640d60d 100644
--- a/test/c/std/wbmp.c
+++ b/test/c/std/wbmp.c
@@ -115,6 +115,97 @@
 }
 
 const char*  //
+test_wuffs_color_ycc_as_color_u32() {
+  CHECK_FOCUS(__func__);
+
+  struct {
+    uint8_t yy;
+    uint8_t cb;
+    uint8_t cr;
+    wuffs_base__color_u32_argb_premul want;
+  } test_cases[] = {
+      {0x00, 0x00, 0x00, 0xFF008700},  //
+      {0x00, 0x00, 0x55, 0xFF004B00},  //
+      {0x00, 0x00, 0xAA, 0xFF3B0E00},  //
+      {0x00, 0x00, 0xFF, 0xFFB20000},  //
+      {0x00, 0x55, 0x00, 0xFF006A00},  //
+      {0x00, 0x55, 0x55, 0xFF002E00},  //
+      {0x00, 0x55, 0xAA, 0xFF3B0000},  //
+      {0x00, 0x55, 0xFF, 0xFFB20000},  //
+      {0x00, 0xAA, 0x00, 0xFF004D4A},  //
+      {0x00, 0xAA, 0x55, 0xFF00104A},  //
+      {0x00, 0xAA, 0xAA, 0xFF3B004A},  //
+      {0x00, 0xAA, 0xFF, 0xFFB2004A},  //
+      {0x00, 0xFF, 0x00, 0xFF0030E1},  //
+      {0x00, 0xFF, 0x55, 0xFF0000E1},  //
+      {0x00, 0xFF, 0xAA, 0xFF3B00E1},  //
+      {0x00, 0xFF, 0xFF, 0xFFB200E1},  //
+      {0x55, 0x00, 0x00, 0xFF00DC00},  //
+      {0x55, 0x00, 0x55, 0xFF19A000},  //
+      {0x55, 0x00, 0xAA, 0xFF906300},  //
+      {0x55, 0x00, 0xFF, 0xFFFF2600},  //
+      {0x55, 0x55, 0x00, 0xFF00BF09},  //
+      {0x55, 0x55, 0x55, 0xFF198309},  //
+      {0x55, 0x55, 0xAA, 0xFF904609},  //
+      {0x55, 0x55, 0xFF, 0xFFFF0909},  //
+      {0x55, 0xAA, 0x00, 0xFF00A29F},  //
+      {0x55, 0xAA, 0x55, 0xFF19659F},  //
+      {0x55, 0xAA, 0xAA, 0xFF90299F},  //
+      {0x55, 0xAA, 0xFF, 0xFFFF009F},  //
+      {0x55, 0xFF, 0x00, 0xFF0085FF},  //
+      {0x55, 0xFF, 0x55, 0xFF1948FF},  //
+      {0x55, 0xFF, 0xAA, 0xFF900BFF},  //
+      {0x55, 0xFF, 0xFF, 0xFFFF00FF},  //
+      {0xAA, 0x00, 0x00, 0xFF00FF00},  //
+      {0xAA, 0x00, 0x55, 0xFF6EF500},  //
+      {0xAA, 0x00, 0xAA, 0xFFE5B800},  //
+      {0xAA, 0x00, 0xFF, 0xFFFF7B00},  //
+      {0xAA, 0x55, 0x00, 0xFF00FF5E},  //
+      {0xAA, 0x55, 0x55, 0xFF6ED85E},  //
+      {0xAA, 0x55, 0xAA, 0xFFE59B5E},  //
+      {0xAA, 0x55, 0xFF, 0xFFFF5E5E},  //
+      {0xAA, 0xAA, 0x00, 0xFF00F7F4},  //
+      {0xAA, 0xAA, 0x55, 0xFF6EBAF4},  //
+      {0xAA, 0xAA, 0xAA, 0xFFE57EF4},  //
+      {0xAA, 0xAA, 0xFF, 0xFFFF41F4},  //
+      {0xAA, 0xFF, 0x00, 0xFF00DAFF},  //
+      {0xAA, 0xFF, 0x55, 0xFF6E9DFF},  //
+      {0xAA, 0xFF, 0xAA, 0xFFE560FF},  //
+      {0xAA, 0xFF, 0xFF, 0xFFFF24FF},  //
+      {0xFF, 0x00, 0x00, 0xFF4CFF1C},  //
+      {0xFF, 0x00, 0x55, 0xFFC3FF1C},  //
+      {0xFF, 0x00, 0xAA, 0xFFFFFF1C},  //
+      {0xFF, 0x00, 0xFF, 0xFFFFD01C},  //
+      {0xFF, 0x55, 0x00, 0xFF4CFFB3},  //
+      {0xFF, 0x55, 0x55, 0xFFC3FFB3},  //
+      {0xFF, 0x55, 0xAA, 0xFFFFF0B3},  //
+      {0xFF, 0x55, 0xFF, 0xFFFFB3B3},  //
+      {0xFF, 0xAA, 0x00, 0xFF4CFFFF},  //
+      {0xFF, 0xAA, 0x55, 0xFFC3FFFF},  //
+      {0xFF, 0xAA, 0xAA, 0xFFFFD3FF},  //
+      {0xFF, 0xAA, 0xFF, 0xFFFF96FF},  //
+      {0xFF, 0xFF, 0x00, 0xFF4CFFFF},  //
+      {0xFF, 0xFF, 0x55, 0xFFC3F2FF},  //
+      {0xFF, 0xFF, 0xAA, 0xFFFFB5FF},  //
+      {0xFF, 0xFF, 0xFF, 0xFFFF79FF},  //
+  };
+
+  for (size_t tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) {
+    wuffs_base__color_u32_argb_premul have =
+        wuffs_base__color_ycc__as__color_u32(
+            test_cases[tc].yy, test_cases[tc].cb, test_cases[tc].cr);
+    if (have != test_cases[tc].want) {
+      RETURN_FAIL(
+          "wuffs_base__color_ycc__as__color_u32(0x%02X, 0x%02X, 0x%02X): have "
+          "0x%08" PRIX32 ", want 0x%08" PRIX32,
+          (int)test_cases[tc].yy, (int)test_cases[tc].cb,
+          (int)test_cases[tc].cr, have, test_cases[tc].want);
+    }
+  }
+  return NULL;
+}
+
+const char*  //
 test_wuffs_pixel_buffer_fill_rect() {
   CHECK_FOCUS(__func__);
 
@@ -675,6 +766,7 @@
     // These pixel_buffer / pixel_swizzler tests are really testing the Wuffs
     // base library. They aren't specific to the std/wbmp code, but putting
     // them here is as good as any other place.
+    test_wuffs_color_ycc_as_color_u32,
     test_wuffs_pixel_buffer_fill_rect,
     test_wuffs_pixel_swizzler_swizzle,
 
diff --git a/test/nia-checksums-of-data.txt b/test/nia-checksums-of-data.txt
index 1eb4fa1..f067494 100644
--- a/test/nia-checksums-of-data.txt
+++ b/test/nia-checksums-of-data.txt
@@ -25,7 +25,7 @@
 e08a7cc8 test/data/artificial-png/exif.png
 e08a7cc8 test/data/artificial-png/key-value-pairs.png
 076cb375 test/data/bricks-color.bmp
-84e5ba7c test/data/bricks-color.jpeg
+96a13918 test/data/bricks-color.jpeg
 076cb375 test/data/bricks-color.png
 076cb375 test/data/bricks-color.tga
 f36c2e80 test/data/bricks-dither.bmp
@@ -49,27 +49,27 @@
 3014b4c0 test/data/gifplayer-muybridge.gif
 030f5a48 test/data/harvesters.bmp
 c18b3d5a test/data/harvesters.gif
-58e255d4 test/data/harvesters.jpeg
+3f0a404d test/data/harvesters.jpeg
 030f5a48 test/data/harvesters.png
 e776c90f test/data/hat.bmp
 6dcba6a4 test/data/hat.gif
-486c598d test/data/hat.jpeg
+75f4f686 test/data/hat.jpeg
 e776c90f test/data/hat.png
 d30bfe5d test/data/hat.wbmp
 33a44f22 test/data/hibiscus.primitive.bmp
 25e212b3 test/data/hibiscus.primitive.gif
-5d1391ce test/data/hibiscus.primitive.jpeg
+9624fa44 test/data/hibiscus.primitive.jpeg
 33a44f22 test/data/hibiscus.primitive.png
 60040742 test/data/hibiscus.regular.bmp
 b727da8b test/data/hibiscus.regular.gif
-5d1391ce test/data/hibiscus.regular.jpeg
+886ee1a1 test/data/hibiscus.regular.jpeg
 60040742 test/data/hibiscus.regular.png
 dcbb225a test/data/hippopotamus.bmp
 ed4b78fc test/data/hippopotamus.interlaced.gif
 dcbb225a test/data/hippopotamus.interlaced.png
 c3c4bd65 test/data/hippopotamus.interlaced.truncated.gif
 7c6a771b test/data/hippopotamus.interlaced.truncated.png
-cbd26e18 test/data/hippopotamus.jpeg
+96bdbbb3 test/data/hippopotamus.jpeg
 d3bbed27 test/data/hippopotamus.masked-with-muybridge.gif
 7e6acf01 test/data/hippopotamus.masked-with-muybridge.png
 dcbb225a test/data/hippopotamus.nie
@@ -81,7 +81,7 @@
 db2733f5 test/data/muybridge.gif
 bf7e8c96 test/data/pjw-thumbnail.bmp
 bf7e8c96 test/data/pjw-thumbnail.gif
-485d4e58 test/data/pjw-thumbnail.jpeg
+7c67a37f test/data/pjw-thumbnail.jpeg
 bf7e8c96 test/data/pjw-thumbnail.png
 38cb4cbf test/data/red-blue-gradient.dcip3d65-no-chrm-no-gama.png
 38cb4cbf test/data/red-blue-gradient.gamma1dot0.png