Fix pixel_swizzler dst offset scalar
diff --git a/internal/cgen/base/image-impl.c b/internal/cgen/base/image-impl.c
index 61ebd14..2e688f8 100644
--- a/internal/cgen/base/image-impl.c
+++ b/internal/cgen/base/image-impl.c
@@ -677,22 +677,22 @@
uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
((size_t)s[0] * 4));
if (s0) {
- wuffs_base__store_u24le__no_bounds_check(d + (0 * 4), s0);
+ wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
}
uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
((size_t)s[1] * 4));
if (s1) {
- wuffs_base__store_u24le__no_bounds_check(d + (1 * 4), s1);
+ wuffs_base__store_u24le__no_bounds_check(d + (1 * 3), s1);
}
uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
((size_t)s[2] * 4));
if (s2) {
- wuffs_base__store_u24le__no_bounds_check(d + (2 * 4), s2);
+ wuffs_base__store_u24le__no_bounds_check(d + (2 * 3), s2);
}
uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
((size_t)s[3] * 4));
if (s3) {
- wuffs_base__store_u24le__no_bounds_check(d + (3 * 4), s3);
+ wuffs_base__store_u24le__no_bounds_check(d + (3 * 3), s3);
}
s += loop_unroll_count * 1;
@@ -704,7 +704,7 @@
uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
((size_t)s[0] * 4));
if (s0) {
- wuffs_base__store_u24le__no_bounds_check(d + (0 * 4), s0);
+ wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
}
s += 1 * 1;
diff --git a/internal/cgen/data.go b/internal/cgen/data.go
index 358f99e..3a3a10d 100644
--- a/internal/cgen/data.go
+++ b/internal/cgen/data.go
@@ -60,8 +60,8 @@
"" +
"// --------\n\nstatic uint64_t //\nwuffs_base__pixel_swizzler__xxx__index__src(wuffs_base__slice_u8 dst,\n wuffs_base__slice_u8 dst_palette,\n wuffs_base__slice_u8 src) {\n if (dst_palette.len != 1024) {\n return 0;\n }\n size_t dst_len3 = dst.len / 3;\n size_t len = dst_len3 < src.len ? dst_len3 : src.len;\n uint8_t* d = dst.ptr;\n uint8_t* s = src.ptr;\n size_t n = len;\n\n const size_t loop_unroll_count = 4;\n\n // The comparison in the while condition is \">\", not \">=\", because with\n // \">=\", the last 4-byte store could write past the end of the dst slice.\n //\n // Each 4-byte store writes one too many bytes, but a subsequent store\n // will overwrite that with the correct byte. There is always another\n // store, whether a 4-byte store in this loop or a 1-byte store in the\n // next loop.\n while (n > loop_unroll_count) {\n wuffs_base__store_u32le__no_bounds_check(\n d + (0 * 3), wuffs_base__load_u32le__no_bounds_c" +
"heck(\n dst_palette.ptr + ((size_t)s[0] * 4)));\n wuffs_base__store_u32le__no_bounds_check(\n d + (1 * 3), wuffs_base__load_u32le__no_bounds_check(\n dst_palette.ptr + ((size_t)s[1] * 4)));\n wuffs_base__store_u32le__no_bounds_check(\n d + (2 * 3), wuffs_base__load_u32le__no_bounds_check(\n dst_palette.ptr + ((size_t)s[2] * 4)));\n wuffs_base__store_u32le__no_bounds_check(\n d + (3 * 3), wuffs_base__load_u32le__no_bounds_check(\n dst_palette.ptr + ((size_t)s[3] * 4)));\n\n s += loop_unroll_count * 1;\n d += loop_unroll_count * 3;\n n -= loop_unroll_count;\n }\n\n while (n >= 1) {\n uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n ((size_t)s[0] * 4));\n wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);\n\n s += 1 * 1;\n d += 1 * 3;\n n -= 1;\n }\n\n return len;\n}\n\nstatic uint64_t //\nwuffs_base__" +
- "pixel_swizzler__xxx__index_binary_alpha__src_over(\n wuffs_base__slice_u8 dst,\n wuffs_base__slice_u8 dst_palette,\n wuffs_base__slice_u8 src) {\n if (dst_palette.len != 1024) {\n return 0;\n }\n size_t dst_len3 = dst.len / 3;\n size_t len = dst_len3 < src.len ? dst_len3 : src.len;\n uint8_t* d = dst.ptr;\n uint8_t* s = src.ptr;\n size_t n = len;\n\n const size_t loop_unroll_count = 4;\n\n while (n >= loop_unroll_count) {\n uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n ((size_t)s[0] * 4));\n if (s0) {\n wuffs_base__store_u24le__no_bounds_check(d + (0 * 4), s0);\n }\n uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n ((size_t)s[1] * 4));\n if (s1) {\n wuffs_base__store_u24le__no_bounds_check(d + (1 * 4), s1);\n }\n uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n " +
- " ((size_t)s[2] * 4));\n if (s2) {\n wuffs_base__store_u24le__no_bounds_check(d + (2 * 4), s2);\n }\n uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n ((size_t)s[3] * 4));\n if (s3) {\n wuffs_base__store_u24le__no_bounds_check(d + (3 * 4), s3);\n }\n\n s += loop_unroll_count * 1;\n d += loop_unroll_count * 3;\n n -= loop_unroll_count;\n }\n\n while (n >= 1) {\n uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n ((size_t)s[0] * 4));\n if (s0) {\n wuffs_base__store_u24le__no_bounds_check(d + (0 * 4), s0);\n }\n\n s += 1 * 1;\n d += 1 * 3;\n n -= 1;\n }\n\n return len;\n}\n\n" +
+ "pixel_swizzler__xxx__index_binary_alpha__src_over(\n wuffs_base__slice_u8 dst,\n wuffs_base__slice_u8 dst_palette,\n wuffs_base__slice_u8 src) {\n if (dst_palette.len != 1024) {\n return 0;\n }\n size_t dst_len3 = dst.len / 3;\n size_t len = dst_len3 < src.len ? dst_len3 : src.len;\n uint8_t* d = dst.ptr;\n uint8_t* s = src.ptr;\n size_t n = len;\n\n const size_t loop_unroll_count = 4;\n\n while (n >= loop_unroll_count) {\n uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n ((size_t)s[0] * 4));\n if (s0) {\n wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);\n }\n uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n ((size_t)s[1] * 4));\n if (s1) {\n wuffs_base__store_u24le__no_bounds_check(d + (1 * 3), s1);\n }\n uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n " +
+ " ((size_t)s[2] * 4));\n if (s2) {\n wuffs_base__store_u24le__no_bounds_check(d + (2 * 3), s2);\n }\n uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n ((size_t)s[3] * 4));\n if (s3) {\n wuffs_base__store_u24le__no_bounds_check(d + (3 * 3), s3);\n }\n\n s += loop_unroll_count * 1;\n d += loop_unroll_count * 3;\n n -= loop_unroll_count;\n }\n\n while (n >= 1) {\n uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n ((size_t)s[0] * 4));\n if (s0) {\n wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);\n }\n\n s += 1 * 1;\n d += 1 * 3;\n n -= 1;\n }\n\n return len;\n}\n\n" +
"" +
"// --------\n\nstatic uint64_t //\nwuffs_base__pixel_swizzler__xxxx__index__src(wuffs_base__slice_u8 dst,\n wuffs_base__slice_u8 dst_palette,\n wuffs_base__slice_u8 src) {\n if (dst_palette.len != 1024) {\n return 0;\n }\n size_t dst_len4 = dst.len / 4;\n size_t len = dst_len4 < src.len ? dst_len4 : src.len;\n uint8_t* d = dst.ptr;\n uint8_t* s = src.ptr;\n size_t n = len;\n\n const size_t loop_unroll_count = 4;\n\n while (n >= loop_unroll_count) {\n wuffs_base__store_u32le__no_bounds_check(\n d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(\n dst_palette.ptr + ((size_t)s[0] * 4)));\n wuffs_base__store_u32le__no_bounds_check(\n d + (1 * 4), wuffs_base__load_u32le__no_bounds_check(\n dst_palette.ptr + ((size_t)s[1] * 4)));\n wuffs_base__store_u32le__no_bounds_check(\n d + (2 * 4), wuffs_base__load_u32le__no_bounds_check(\n dst_pale" +
"tte.ptr + ((size_t)s[2] * 4)));\n wuffs_base__store_u32le__no_bounds_check(\n d + (3 * 4), wuffs_base__load_u32le__no_bounds_check(\n dst_palette.ptr + ((size_t)s[3] * 4)));\n\n s += loop_unroll_count * 1;\n d += loop_unroll_count * 4;\n n -= loop_unroll_count;\n }\n\n while (n >= 1) {\n wuffs_base__store_u32le__no_bounds_check(\n d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(\n dst_palette.ptr + ((size_t)s[0] * 4)));\n\n s += 1 * 1;\n d += 1 * 4;\n n -= 1;\n }\n\n return len;\n}\n\nstatic uint64_t //\nwuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over(\n wuffs_base__slice_u8 dst,\n wuffs_base__slice_u8 dst_palette,\n wuffs_base__slice_u8 src) {\n if (dst_palette.len != 1024) {\n return 0;\n }\n size_t dst_len4 = dst.len / 4;\n size_t len = dst_len4 < src.len ? dst_len4 : src.len;\n uint8_t* d = dst.ptr;\n uint8_t* s = src.ptr;\n size_t n = len;\n\n const size_t loop_unroll_count = 4;\n\n while (n >= loop_unroll_count)" +
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 5e0fe7e..73d07eb 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -9183,22 +9183,22 @@
uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
((size_t)s[0] * 4));
if (s0) {
- wuffs_base__store_u24le__no_bounds_check(d + (0 * 4), s0);
+ wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
}
uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
((size_t)s[1] * 4));
if (s1) {
- wuffs_base__store_u24le__no_bounds_check(d + (1 * 4), s1);
+ wuffs_base__store_u24le__no_bounds_check(d + (1 * 3), s1);
}
uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
((size_t)s[2] * 4));
if (s2) {
- wuffs_base__store_u24le__no_bounds_check(d + (2 * 4), s2);
+ wuffs_base__store_u24le__no_bounds_check(d + (2 * 3), s2);
}
uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
((size_t)s[3] * 4));
if (s3) {
- wuffs_base__store_u24le__no_bounds_check(d + (3 * 4), s3);
+ wuffs_base__store_u24le__no_bounds_check(d + (3 * 3), s3);
}
s += loop_unroll_count * 1;
@@ -9210,7 +9210,7 @@
uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
((size_t)s[0] * 4));
if (s0) {
- wuffs_base__store_u24le__no_bounds_check(d + (0 * 4), s0);
+ wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
}
s += 1 * 1;