Don't unroll pixel_swizzler 4x8 4x16le conversion
Binary size, before:
18712 gen/lib/c/clang-9-dynamic/wuffs-base-pixconv.lo
19392 gen/lib/c/clang-9-static/wuffs-base-pixconv.o
36880 gen/lib/c/gcc-dynamic/wuffs-base-pixconv.lo
36824 gen/lib/c/gcc-static/wuffs-base-pixconv.o
After:
18520 gen/lib/c/clang-9-dynamic/wuffs-base-pixconv.lo
19200 gen/lib/c/clang-9-static/wuffs-base-pixconv.o
30256 gen/lib/c/gcc-dynamic/wuffs-base-pixconv.lo
30192 gen/lib/c/gcc-static/wuffs-base-pixconv.o
diff --git a/internal/cgen/base/pixconv-submodule.c b/internal/cgen/base/pixconv-submodule.c
index edcd535..2cf92de 100644
--- a/internal/cgen/base/pixconv-submodule.c
+++ b/internal/cgen/base/pixconv-submodule.c
@@ -540,28 +540,7 @@
uint8_t* d = dst_ptr;
const uint8_t* s = src_ptr;
- const size_t loop_unroll_count = 4;
-
size_t n = len;
- while (n >= loop_unroll_count) {
- wuffs_base__store_u32le__no_bounds_check(
- d + (0 * 4), wuffs_base__color_u64__as__color_u32(
- wuffs_base__load_u64le__no_bounds_check(s + (0 * 8))));
- wuffs_base__store_u32le__no_bounds_check(
- d + (1 * 4), wuffs_base__color_u64__as__color_u32(
- wuffs_base__load_u64le__no_bounds_check(s + (1 * 8))));
- wuffs_base__store_u32le__no_bounds_check(
- d + (2 * 4), wuffs_base__color_u64__as__color_u32(
- wuffs_base__load_u64le__no_bounds_check(s + (2 * 8))));
- wuffs_base__store_u32le__no_bounds_check(
- d + (3 * 4), wuffs_base__color_u64__as__color_u32(
- wuffs_base__load_u64le__no_bounds_check(s + (3 * 8))));
-
- s += loop_unroll_count * 8;
- d += loop_unroll_count * 4;
- n -= loop_unroll_count;
- }
-
while (n >= 1) {
wuffs_base__store_u32le__no_bounds_check(
d + (0 * 4), wuffs_base__color_u64__as__color_u32(
diff --git a/internal/cgen/data/data.go b/internal/cgen/data/data.go
index a113893..41b54ba 100644
--- a/internal/cgen/data/data.go
+++ b/internal/cgen/data/data.go
@@ -540,8 +540,7 @@
"// --------\n\nstatic uint64_t //\nwuffs_base__pixel_swizzler__squash_align4_bgr_565_888(\n wuffs_base__slice_u8 dst,\n wuffs_base__slice_u8 src) {\n size_t len = (dst.len < src.len ? dst.len : src.len) / 4;\n uint8_t* d = dst.ptr;\n const uint8_t* s = src.ptr;\n\n size_t n = len;\n while (n--) {\n uint32_t argb = wuffs_base__load_u32le__no_bounds_check(s);\n uint32_t b5 = 0x1F & (argb >> (8 - 5));\n uint32_t g6 = 0x3F & (argb >> (16 - 6));\n uint32_t r5 = 0x1F & (argb >> (24 - 5));\n uint32_t alpha = argb & 0xFF000000;\n wuffs_base__store_u32le__no_bounds_check(\n d, alpha | (r5 << 11) | (g6 << 5) | (b5 << 0));\n s += 4;\n d += 4;\n }\n return len;\n}\n\nstatic uint64_t //\nwuffs_base__pixel_swizzler__swap_rgbx_bgrx(wuffs_base__slice_u8 dst,\n wuffs_base__slice_u8 src) {\n size_t len = (dst.len < src.len ? dst.len : src.len) / 4;\n uint8_t* d = dst.ptr;\n const uint8_t* s = src.ptr;\n\n size_t n = len;\n while (n--) {\n uint8_t b0 = s[0];\n uint" +
"8_t b1 = s[1];\n uint8_t b2 = s[2];\n uint8_t b3 = s[3];\n d[0] = b2;\n d[1] = b1;\n d[2] = b0;\n d[3] = b3;\n s += 4;\n d += 4;\n }\n return len;\n}\n\n" +
"" +
- "// --------\n\nstatic uint64_t //\nwuffs_base__pixel_swizzler__squash_tight_4x8_4x16le(uint8_t* dst_ptr,\n size_t dst_len,\n uint8_t* dst_palette_ptr,\n size_t dst_palette_len,\n const uint8_t* src_ptr,\n size_t src_len) {\n size_t dst_len4 = dst_len / 4;\n size_t src_len8 = src_len / 8;\n size_t len = (dst_len4 < src_len8) ? dst_len4 : src_len8;\n uint8_t* d = dst_ptr;\n const uint8_t* s = src_ptr;\n\n const size_t loop_unroll_count = 4;\n\n size_t n = len;\n while (n >= loop_unroll_count) {\n wuffs_base__store_u32le__no_bounds_check(\n d + (0 * 4), wuffs_base__color_u64__as__color_u32(\n wuffs_base__load_u64le__no_bounds_check(s + (0 * 8))));\n wuffs_base__store_u32le__no_bounds_check(\n d + (1 * 4), wuffs_base__color_u64__as__" +
- "color_u32(\n wuffs_base__load_u64le__no_bounds_check(s + (1 * 8))));\n wuffs_base__store_u32le__no_bounds_check(\n d + (2 * 4), wuffs_base__color_u64__as__color_u32(\n wuffs_base__load_u64le__no_bounds_check(s + (2 * 8))));\n wuffs_base__store_u32le__no_bounds_check(\n d + (3 * 4), wuffs_base__color_u64__as__color_u32(\n wuffs_base__load_u64le__no_bounds_check(s + (3 * 8))));\n\n s += loop_unroll_count * 8;\n d += loop_unroll_count * 4;\n n -= loop_unroll_count;\n }\n\n while (n >= 1) {\n wuffs_base__store_u32le__no_bounds_check(\n d + (0 * 4), wuffs_base__color_u64__as__color_u32(\n wuffs_base__load_u64le__no_bounds_check(s + (0 * 8))));\n\n s += 1 * 8;\n d += 1 * 4;\n n -= 1;\n }\n return len;\n}\n\n" +
+ "// --------\n\nstatic uint64_t //\nwuffs_base__pixel_swizzler__squash_tight_4x8_4x16le(uint8_t* dst_ptr,\n size_t dst_len,\n uint8_t* dst_palette_ptr,\n size_t dst_palette_len,\n const uint8_t* src_ptr,\n size_t src_len) {\n size_t dst_len4 = dst_len / 4;\n size_t src_len8 = src_len / 8;\n size_t len = (dst_len4 < src_len8) ? dst_len4 : src_len8;\n uint8_t* d = dst_ptr;\n const uint8_t* s = src_ptr;\n\n size_t n = len;\n while (n >= 1) {\n wuffs_base__store_u32le__no_bounds_check(\n d + (0 * 4), wuffs_base__color_u64__as__color_u32(\n wuffs_base__load_u64le__no_bounds_check(s + (0 * 8))));\n\n s += 1 * 8;\n d += 1 * 4;\n n -= 1;\n }\n return len;\n}\n\n" +
"" +
"// --------\n\nstatic uint64_t //\nwuffs_base__pixel_swizzler__copy_1_1(uint8_t* dst_ptr,\n size_t dst_len,\n uint8_t* dst_palette_ptr,\n size_t dst_palette_len,\n const uint8_t* src_ptr,\n size_t src_len) {\n size_t len = (dst_len < src_len) ? dst_len : src_len;\n if (len > 0) {\n memmove(dst_ptr, src_ptr, len);\n }\n return len;\n}\n\nstatic uint64_t //\nwuffs_base__pixel_swizzler__copy_3_3(uint8_t* dst_ptr,\n size_t dst_len,\n uint8_t* dst_palette_ptr,\n size_t dst_palette_len,\n const uint8_t* src_ptr,\n size_t src_len) {\n size_t dst_len3 = dst_len / 3;\n size_t src_len3 = src_len / 3;\n size_t len = (dst_len3 < src_len3) ? dst_len3 : src_len3;\n if (len > 0) {\n " +
" memmove(dst_ptr, src_ptr, len * 3);\n }\n return len;\n}\n\nstatic uint64_t //\nwuffs_base__pixel_swizzler__copy_4_4(uint8_t* dst_ptr,\n size_t dst_len,\n uint8_t* dst_palette_ptr,\n size_t dst_palette_len,\n const uint8_t* src_ptr,\n size_t src_len) {\n size_t dst_len4 = dst_len / 4;\n size_t src_len4 = src_len / 4;\n size_t len = (dst_len4 < src_len4) ? dst_len4 : src_len4;\n if (len > 0) {\n memmove(dst_ptr, src_ptr, len * 4);\n }\n return len;\n}\n\n" +
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 3987662..1fc900e 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -13996,28 +13996,7 @@
uint8_t* d = dst_ptr;
const uint8_t* s = src_ptr;
- const size_t loop_unroll_count = 4;
-
size_t n = len;
- while (n >= loop_unroll_count) {
- wuffs_base__store_u32le__no_bounds_check(
- d + (0 * 4), wuffs_base__color_u64__as__color_u32(
- wuffs_base__load_u64le__no_bounds_check(s + (0 * 8))));
- wuffs_base__store_u32le__no_bounds_check(
- d + (1 * 4), wuffs_base__color_u64__as__color_u32(
- wuffs_base__load_u64le__no_bounds_check(s + (1 * 8))));
- wuffs_base__store_u32le__no_bounds_check(
- d + (2 * 4), wuffs_base__color_u64__as__color_u32(
- wuffs_base__load_u64le__no_bounds_check(s + (2 * 8))));
- wuffs_base__store_u32le__no_bounds_check(
- d + (3 * 4), wuffs_base__color_u64__as__color_u32(
- wuffs_base__load_u64le__no_bounds_check(s + (3 * 8))));
-
- s += loop_unroll_count * 8;
- d += loop_unroll_count * 4;
- n -= loop_unroll_count;
- }
-
while (n >= 1) {
wuffs_base__store_u32le__no_bounds_check(
d + (0 * 4), wuffs_base__color_u64__as__color_u32(