Fix pixel_swizzler dst offset scalar
diff --git a/internal/cgen/base/image-impl.c b/internal/cgen/base/image-impl.c
index 61ebd14..2e688f8 100644
--- a/internal/cgen/base/image-impl.c
+++ b/internal/cgen/base/image-impl.c
@@ -677,22 +677,22 @@
     uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
                                                           ((size_t)s[0] * 4));
     if (s0) {
-      wuffs_base__store_u24le__no_bounds_check(d + (0 * 4), s0);
+      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
     }
     uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
                                                           ((size_t)s[1] * 4));
     if (s1) {
-      wuffs_base__store_u24le__no_bounds_check(d + (1 * 4), s1);
+      wuffs_base__store_u24le__no_bounds_check(d + (1 * 3), s1);
     }
     uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
                                                           ((size_t)s[2] * 4));
     if (s2) {
-      wuffs_base__store_u24le__no_bounds_check(d + (2 * 4), s2);
+      wuffs_base__store_u24le__no_bounds_check(d + (2 * 3), s2);
     }
     uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
                                                           ((size_t)s[3] * 4));
     if (s3) {
-      wuffs_base__store_u24le__no_bounds_check(d + (3 * 4), s3);
+      wuffs_base__store_u24le__no_bounds_check(d + (3 * 3), s3);
     }
 
     s += loop_unroll_count * 1;
@@ -704,7 +704,7 @@
     uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
                                                           ((size_t)s[0] * 4));
     if (s0) {
-      wuffs_base__store_u24le__no_bounds_check(d + (0 * 4), s0);
+      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
     }
 
     s += 1 * 1;
diff --git a/internal/cgen/data.go b/internal/cgen/data.go
index 358f99e..3a3a10d 100644
--- a/internal/cgen/data.go
+++ b/internal/cgen/data.go
@@ -60,8 +60,8 @@
 	"" +
 	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxx__index__src(wuffs_base__slice_u8 dst,\n                                            wuffs_base__slice_u8 dst_palette,\n                                            wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len3 = dst.len / 3;\n  size_t len = dst_len3 < src.len ? dst_len3 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  // The comparison in the while condition is \">\", not \">=\", because with\n  // \">=\", the last 4-byte store could write past the end of the dst slice.\n  //\n  // Each 4-byte store writes one too many bytes, but a subsequent store\n  // will overwrite that with the correct byte. There is always another\n  // store, whether a 4-byte store in this loop or a 1-byte store in the\n  // next loop.\n  while (n > loop_unroll_count) {\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 3), wuffs_base__load_u32le__no_bounds_c" +
 	"heck(\n                         dst_palette.ptr + ((size_t)s[0] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (1 * 3), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[1] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (2 * 3), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[2] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (3 * 3), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[3] * 4)));\n\n    s += loop_unroll_count * 1;\n    d += loop_unroll_count * 3;\n    n -= loop_unroll_count;\n  }\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);\n\n    s += 1 * 1;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__" +
-	"pixel_swizzler__xxx__index_binary_alpha__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len3 = dst.len / 3;\n  size_t len = dst_len3 < src.len ? dst_len3 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  while (n >= loop_unroll_count) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__store_u24le__no_bounds_check(d + (0 * 4), s0);\n    }\n    uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[1] * 4));\n    if (s1) {\n      wuffs_base__store_u24le__no_bounds_check(d + (1 * 4), s1);\n    }\n    uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                    " +
-	"                      ((size_t)s[2] * 4));\n    if (s2) {\n      wuffs_base__store_u24le__no_bounds_check(d + (2 * 4), s2);\n    }\n    uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[3] * 4));\n    if (s3) {\n      wuffs_base__store_u24le__no_bounds_check(d + (3 * 4), s3);\n    }\n\n    s += loop_unroll_count * 1;\n    d += loop_unroll_count * 3;\n    n -= loop_unroll_count;\n  }\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__store_u24le__no_bounds_check(d + (0 * 4), s0);\n    }\n\n    s += 1 * 1;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
+	"pixel_swizzler__xxx__index_binary_alpha__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len3 = dst.len / 3;\n  size_t len = dst_len3 < src.len ? dst_len3 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  while (n >= loop_unroll_count) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);\n    }\n    uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[1] * 4));\n    if (s1) {\n      wuffs_base__store_u24le__no_bounds_check(d + (1 * 3), s1);\n    }\n    uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                    " +
+	"                      ((size_t)s[2] * 4));\n    if (s2) {\n      wuffs_base__store_u24le__no_bounds_check(d + (2 * 3), s2);\n    }\n    uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[3] * 4));\n    if (s3) {\n      wuffs_base__store_u24le__no_bounds_check(d + (3 * 3), s3);\n    }\n\n    s += loop_unroll_count * 1;\n    d += loop_unroll_count * 3;\n    n -= loop_unroll_count;\n  }\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);\n    }\n\n    s += 1 * 1;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
 	"" +
 	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxx__index__src(wuffs_base__slice_u8 dst,\n                                             wuffs_base__slice_u8 dst_palette,\n                                             wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len4 = dst.len / 4;\n  size_t len = dst_len4 < src.len ? dst_len4 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  while (n >= loop_unroll_count) {\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[0] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (1 * 4), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[1] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (2 * 4), wuffs_base__load_u32le__no_bounds_check(\n                         dst_pale" +
 	"tte.ptr + ((size_t)s[2] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (3 * 4), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[3] * 4)));\n\n    s += loop_unroll_count * 1;\n    d += loop_unroll_count * 4;\n    n -= loop_unroll_count;\n  }\n\n  while (n >= 1) {\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[0] * 4)));\n\n    s += 1 * 1;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len4 = dst.len / 4;\n  size_t len = dst_len4 < src.len ? dst_len4 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  while (n >= loop_unroll_count)" +
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 5e0fe7e..73d07eb 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -9183,22 +9183,22 @@
     uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
                                                           ((size_t)s[0] * 4));
     if (s0) {
-      wuffs_base__store_u24le__no_bounds_check(d + (0 * 4), s0);
+      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
     }
     uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
                                                           ((size_t)s[1] * 4));
     if (s1) {
-      wuffs_base__store_u24le__no_bounds_check(d + (1 * 4), s1);
+      wuffs_base__store_u24le__no_bounds_check(d + (1 * 3), s1);
     }
     uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
                                                           ((size_t)s[2] * 4));
     if (s2) {
-      wuffs_base__store_u24le__no_bounds_check(d + (2 * 4), s2);
+      wuffs_base__store_u24le__no_bounds_check(d + (2 * 3), s2);
     }
     uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
                                                           ((size_t)s[3] * 4));
     if (s3) {
-      wuffs_base__store_u24le__no_bounds_check(d + (3 * 4), s3);
+      wuffs_base__store_u24le__no_bounds_check(d + (3 * 3), s3);
     }
 
     s += loop_unroll_count * 1;
@@ -9210,7 +9210,7 @@
     uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
                                                           ((size_t)s[0] * 4));
     if (s0) {
-      wuffs_base__store_u24le__no_bounds_check(d + (0 * 4), s0);
+      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
     }
 
     s += 1 * 1;