std/jpeg: prepare for triangle-filter upsampling
diff --git a/fuzz/c/std/pixel_swizzler_fuzzer.c b/fuzz/c/std/pixel_swizzler_fuzzer.c
index aafe60a..a73a462 100644
--- a/fuzz/c/std/pixel_swizzler_fuzzer.c
+++ b/fuzz/c/std/pixel_swizzler_fuzzer.c
@@ -234,7 +234,7 @@
uint32_t v0 = possible_hv_values[allow_hv3][3 & (hash >> 31)];
uint32_t v1 = possible_hv_values[allow_hv3][3 & (hash >> 33)];
uint32_t v2 = possible_hv_values[allow_hv3][3 & (hash >> 35)];
- // TODO: spend a hash bit for triangle_filter_for_2to1.
+ bool triangle_filter_for_2to1 = 1 & (hash >> 37);
uint32_t width0 = 8 * width_in_mcus * h0;
uint32_t width1 = 8 * width_in_mcus * h1;
@@ -339,6 +339,7 @@
wuffs_base__slice_u8 src3 = wuffs_base__empty_slice_u8();
wuffs_base__pixel_swizzler swizzler = {0};
+ uint8_t scratch_buffer[2048];
wuffs_base__status status = wuffs_base__pixel_swizzler__swizzle_ycck(
&swizzler, &dst_pixbuf, dst_palette, //
width, height, //
@@ -348,7 +349,8 @@
width0, width1, width2, 0, //
h0, h1, h2, 0, //
v0, v1, v2, 0, //
- false);
+ triangle_filter_for_2to1, //
+ wuffs_base__make_slice_u8(scratch_buffer, sizeof(scratch_buffer)));
if (status.repr) {
return wuffs_base__status__message(&status);
}
diff --git a/internal/cgen/base/image-private.h b/internal/cgen/base/image-private.h
index 7d3f01d..83807ae 100644
--- a/internal/cgen/base/image-private.h
+++ b/internal/cgen/base/image-private.h
@@ -39,36 +39,38 @@
uint64_t num_pixels);
WUFFS_BASE__MAYBE_STATIC wuffs_base__status //
-wuffs_base__pixel_swizzler__swizzle_ycck(const wuffs_base__pixel_swizzler* p,
- wuffs_base__pixel_buffer* dst,
- wuffs_base__slice_u8 dst_palette,
- uint32_t width,
- uint32_t height,
- wuffs_base__slice_u8 src0,
- wuffs_base__slice_u8 src1,
- wuffs_base__slice_u8 src2,
- wuffs_base__slice_u8 src3,
- uint32_t width0,
- uint32_t width1,
- uint32_t width2,
- uint32_t width3,
- uint32_t height0,
- uint32_t height1,
- uint32_t height2,
- uint32_t height3,
- uint32_t stride0,
- uint32_t stride1,
- uint32_t stride2,
- uint32_t stride3,
- uint8_t h0,
- uint8_t h1,
- uint8_t h2,
- uint8_t h3,
- uint8_t v0,
- uint8_t v1,
- uint8_t v2,
- uint8_t v3,
- bool triangle_filter_for_2to1);
+wuffs_base__pixel_swizzler__swizzle_ycck(
+ const wuffs_base__pixel_swizzler* p,
+ wuffs_base__pixel_buffer* dst,
+ wuffs_base__slice_u8 dst_palette,
+ uint32_t width,
+ uint32_t height,
+ wuffs_base__slice_u8 src0,
+ wuffs_base__slice_u8 src1,
+ wuffs_base__slice_u8 src2,
+ wuffs_base__slice_u8 src3,
+ uint32_t width0,
+ uint32_t width1,
+ uint32_t width2,
+ uint32_t width3,
+ uint32_t height0,
+ uint32_t height1,
+ uint32_t height2,
+ uint32_t height3,
+ uint32_t stride0,
+ uint32_t stride1,
+ uint32_t stride2,
+ uint32_t stride3,
+ uint8_t h0,
+ uint8_t h1,
+ uint8_t h2,
+ uint8_t h3,
+ uint8_t v0,
+ uint8_t v1,
+ uint8_t v2,
+ uint8_t v3,
+ bool triangle_filter_for_2to1,
+ wuffs_base__slice_u8 scratch_buffer_2k);
// ---------------- Images (Utility)
diff --git a/internal/cgen/base/pixconv-submodule-ycck.c b/internal/cgen/base/pixconv-submodule-ycck.c
index 6705692..004b1e2 100644
--- a/internal/cgen/base/pixconv-submodule-ycck.c
+++ b/internal/cgen/base/pixconv-submodule-ycck.c
@@ -34,14 +34,241 @@
e);
}
-// Preconditions: see all the checks made in
-// wuffs_base__pixel_swizzler__swizzle_ycck before calling this function. For
+// --------
+
+// wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upsamples to a
+// destination slice at least 480 (YCCK) or 672 (YCC) bytes long and whose
+// src_len (multiplied by 1, 2, 3 or 4) is positive but no more than that. This
+// 480 or 672 length is just under 1/4 or 1/3 of the scratch_buffer_2k slice
+// length. Both (480 * 4) = 1920 and (672 * 3) = 2016 are less than 2048.
+//
+// 480 and 672 are nice round numbers because a JPEG MCU is 1, 2, 3 or 4 blocks
+// wide and each block is 8 pixels wide. We have:
+// 480 = 1 * 8 * 60, 672 = 1 * 8 * 84
+// 480 = 2 * 8 * 30, 672 = 2 * 8 * 42
+// 480 = 3 * 8 * 20, 672 = 3 * 8 * 28
+// 480 = 4 * 8 * 15, 672 = 4 * 8 * 21
+//
+// Box filters are equivalent to nearest neighbor upsampling. These ignore the
+// src_ptr_minor, h1v2_bias, first_column and last_column arguments.
+//
+// TODO: triangle filters.
+typedef const uint8_t* (
+ *wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func)(
+ uint8_t* dst_ptr,
+ const uint8_t* src_ptr_major, // Nearest row.
+ const uint8_t* src_ptr_minor, // Adjacent row, alternating above or below.
+ size_t src_len,
+ uint32_t h1v2_bias,
+ bool first_column,
+ bool last_column);
+
+static const uint8_t* //
+wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h1vn_box(
+ uint8_t* dst_ptr,
+ const uint8_t* src_ptr_major,
+ const uint8_t* src_ptr_minor_ignored,
+ size_t src_len,
+ uint32_t h1v2_bias_ignored,
+ bool first_column_ignored,
+ bool last_column_ignored) {
+ return src_ptr_major;
+}
+
+static const uint8_t* //
+wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box(
+ uint8_t* dst_ptr,
+ const uint8_t* src_ptr_major,
+ const uint8_t* src_ptr_minor_ignored,
+ size_t src_len,
+ uint32_t h1v2_bias_ignored,
+ bool first_column_ignored,
+ bool last_column_ignored) {
+ uint8_t* dp = dst_ptr;
+ const uint8_t* sp = src_ptr_major;
+ while (src_len--) {
+ uint8_t sv = *sp++;
+ *dp++ = sv;
+ *dp++ = sv;
+ }
+ return dst_ptr;
+}
+
+static const uint8_t* //
+wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h3vn_box(
+ uint8_t* dst_ptr,
+ const uint8_t* src_ptr_major,
+ const uint8_t* src_ptr_minor_ignored,
+ size_t src_len,
+ uint32_t h1v2_bias_ignored,
+ bool first_column_ignored,
+ bool last_column_ignored) {
+ uint8_t* dp = dst_ptr;
+ const uint8_t* sp = src_ptr_major;
+ while (src_len--) {
+ uint8_t sv = *sp++;
+ *dp++ = sv;
+ *dp++ = sv;
+ *dp++ = sv;
+ }
+ return dst_ptr;
+}
+
+static const uint8_t* //
+wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h4vn_box(
+ uint8_t* dst_ptr,
+ const uint8_t* src_ptr_major,
+ const uint8_t* src_ptr_minor_ignored,
+ size_t src_len,
+ uint32_t h1v2_bias_ignored,
+ bool first_column_ignored,
+ bool last_column_ignored) {
+ uint8_t* dp = dst_ptr;
+ const uint8_t* sp = src_ptr_major;
+ while (src_len--) {
+ uint8_t sv = *sp++;
+ *dp++ = sv;
+ *dp++ = sv;
+ *dp++ = sv;
+ *dp++ = sv;
+ }
+ return dst_ptr;
+}
+
+// wuffs_base__pixel_swizzler__swizzle_ycc__upsample_funcs is indexed by inv_h
+// and then inv_v.
+static const wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_funcs[4][4] = {
+ {
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h1vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h1vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h1vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h1vn_box,
+ },
+ {
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box,
+ },
+ {
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h3vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h3vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h3vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h3vn_box,
+ },
+ {
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h4vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h4vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h4vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h4vn_box,
+ },
+};
+
+static inline uint32_t //
+wuffs_base__pixel_swizzler__has_triangle_upsampler(uint32_t inv_h,
+ uint32_t inv_v) {
+ if (inv_h == 1u) {
+ return inv_v == 2u;
+ } else if (inv_h == 2u) {
+ return (inv_v == 1u) || (inv_v == 2u);
+ }
+ return false;
+}
+
+// --------
+
+// All of the wuffs_base__pixel_swizzler__swizzle_ycc__etc functions have
+// preconditions. See all of the checks made in
+// wuffs_base__pixel_swizzler__swizzle_ycck before calling these functions. For
// example, (width > 0) is a precondition, but there are many more.
+
static void //
-wuffs_base__pixel_swizzler__swizzle_ycc__general__box_filter(
- const wuffs_base__pixel_swizzler* p,
+wuffs_base__pixel_swizzler__swizzle_ycc__general__triangle_filter_single_row(
wuffs_base__pixel_buffer* dst,
- wuffs_base__slice_u8 dst_palette,
+ uint32_t width,
+ uint32_t y,
+ const uint8_t* src_ptr0,
+ const uint8_t* src_ptr1,
+ const uint8_t* src_ptr2,
+ uint32_t stride0,
+ uint32_t stride1,
+ uint32_t stride2,
+ uint32_t inv_h0,
+ uint32_t inv_h1,
+ uint32_t inv_h2,
+ uint32_t inv_v0,
+ uint32_t inv_v1,
+ uint32_t inv_v2,
+ uint32_t half_width_for_2to1,
+ uint32_t h1v2_bias,
+ uint8_t* scratch_buffer_2k_ptr,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc0,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc1,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc2) {
+ const uint8_t* src0 = src_ptr0 + ((y / inv_v0) * (size_t)stride0);
+ const uint8_t* src1 = src_ptr1 + ((y / inv_v1) * (size_t)stride1);
+ const uint8_t* src2 = src_ptr2 + ((y / inv_v2) * (size_t)stride2);
+ uint32_t total_src_len0 = 0u;
+ uint32_t total_src_len1 = 0u;
+ uint32_t total_src_len2 = 0u;
+
+ uint32_t x = 0u;
+ while (x < width) {
+ bool first_column = x == 0u;
+ uint32_t end = x + 672u;
+ if (end > width) {
+ end = width;
+ }
+
+ uint32_t src_len0 = ((end - x) + inv_h0 - 1u) / inv_h0;
+ uint32_t src_len1 = ((end - x) + inv_h1 - 1u) / inv_h1;
+ uint32_t src_len2 = ((end - x) + inv_h2 - 1u) / inv_h2;
+ total_src_len0 += src_len0;
+ total_src_len1 += src_len1;
+ total_src_len2 += src_len2;
+
+ const uint8_t* src_ptr_x0 = src0 + (x / inv_h0);
+ const uint8_t* up0 = (*upfunc0)( //
+ scratch_buffer_2k_ptr + (0u * 672u), //
+ src_ptr_x0, //
+ src_ptr_x0, //
+ src_len0, //
+ h1v2_bias, //
+ first_column, //
+ (total_src_len0 >= half_width_for_2to1));
+
+ const uint8_t* src_ptr_x1 = src1 + (x / inv_h1);
+ const uint8_t* up1 = (*upfunc1)( //
+ scratch_buffer_2k_ptr + (1u * 672u), //
+ src_ptr_x1, //
+ src_ptr_x1, //
+ src_len1, //
+ h1v2_bias, //
+ first_column, //
+ (total_src_len1 >= half_width_for_2to1));
+
+ const uint8_t* src_ptr_x2 = src2 + (x / inv_h2);
+ const uint8_t* up2 = (*upfunc2)( //
+ scratch_buffer_2k_ptr + (2u * 672u), //
+ src_ptr_x2, //
+ src_ptr_x2, //
+ src_len2, //
+ h1v2_bias, //
+ first_column, //
+ (total_src_len2 >= half_width_for_2to1));
+
+ for (; x < end; x++) {
+ wuffs_base__pixel_buffer__set_color_u32_at(
+ dst, x, y,
+ wuffs_base__color_ycc__as__color_u32(*up0++, *up1++, *up2++));
+ }
+ }
+}
+
+static void //
+wuffs_base__pixel_swizzler__swizzle_ycc__general__triangle_filter(
+ wuffs_base__pixel_buffer* dst,
uint32_t width,
uint32_t height,
const uint8_t* src_ptr0,
@@ -50,74 +277,129 @@
uint32_t stride0,
uint32_t stride1,
uint32_t stride2,
- uint32_t h0_out_of_12,
- uint32_t h1_out_of_12,
- uint32_t h2_out_of_12,
- uint32_t v0_out_of_12,
- uint32_t v1_out_of_12,
- uint32_t v2_out_of_12) {
- uint32_t iy0 = 0;
- uint32_t iy1 = 0;
- uint32_t iy2 = 0;
- uint32_t y = 0;
+ uint32_t inv_h0,
+ uint32_t inv_h1,
+ uint32_t inv_h2,
+ uint32_t inv_v0,
+ uint32_t inv_v1,
+ uint32_t inv_v2,
+ uint32_t half_width_for_2to1,
+ uint32_t half_height_for_2to1,
+ uint8_t* scratch_buffer_2k_ptr) {
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc0 =
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_funcs
+ [(inv_h0 - 1u) & 3u][(inv_v0 - 1u) & 3u];
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc1 =
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_funcs
+ [(inv_h1 - 1u) & 3u][(inv_v1 - 1u) & 3u];
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc2 =
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_funcs
+ [(inv_h2 - 1u) & 3u][(inv_v2 - 1u) & 3u];
+
+ uint32_t y;
+ for (y = 0u; y < height; y++) {
+ wuffs_base__pixel_swizzler__swizzle_ycc__general__triangle_filter_single_row(
+ dst, width, y, //
+ src_ptr0, src_ptr1, src_ptr2, //
+ stride0, stride1, stride2, //
+ inv_h0, inv_h1, inv_h2, //
+ inv_v0, inv_v1, inv_v2, //
+ half_width_for_2to1, //
+ 0u, //
+ scratch_buffer_2k_ptr, //
+ upfunc0, upfunc1, upfunc2);
+ }
+}
+
+static void //
+wuffs_base__pixel_swizzler__swizzle_ycc__general__box_filter(
+ wuffs_base__pixel_buffer* dst,
+ uint32_t width,
+ uint32_t height,
+ const uint8_t* src_ptr0,
+ const uint8_t* src_ptr1,
+ const uint8_t* src_ptr2,
+ uint32_t stride0,
+ uint32_t stride1,
+ uint32_t stride2,
+ uint32_t inv_h0,
+ uint32_t inv_h1,
+ uint32_t inv_h2,
+ uint32_t inv_v0,
+ uint32_t inv_v1,
+ uint32_t inv_v2) {
+ // Convert an inv_h or inv_v value from {1, 2, 3, 4} to {12, 6, 4, 3}.
+ uint32_t h0_out_of_12 = 12u / inv_h0;
+ uint32_t h1_out_of_12 = 12u / inv_h1;
+ uint32_t h2_out_of_12 = 12u / inv_h2;
+ uint32_t v0_out_of_12 = 12u / inv_v0;
+ uint32_t v1_out_of_12 = 12u / inv_v1;
+ uint32_t v2_out_of_12 = 12u / inv_v2;
+
+ uint32_t iy0 = 0u;
+ uint32_t iy1 = 0u;
+ uint32_t iy2 = 0u;
+ uint32_t y = 0u;
while (true) {
const uint8_t* src_iter0 = src_ptr0;
const uint8_t* src_iter1 = src_ptr1;
const uint8_t* src_iter2 = src_ptr2;
- uint32_t ix0 = 0;
- uint32_t ix1 = 0;
- uint32_t ix2 = 0;
- uint32_t x = 0;
+ uint32_t ix0 = 0u;
+ uint32_t ix1 = 0u;
+ uint32_t ix2 = 0u;
+ uint32_t x = 0u;
while (true) {
wuffs_base__pixel_buffer__set_color_u32_at(
dst, x, y,
wuffs_base__color_ycc__as__color_u32(*src_iter0, *src_iter1,
*src_iter2));
- if ((x + 1) == width) {
+ if ((x + 1u) == width) {
break;
}
- x = x + 1;
+ x = x + 1u;
ix0 += h0_out_of_12;
- if (ix0 >= 12) {
- ix0 = 0;
+ if (ix0 >= 12u) {
+ ix0 = 0u;
src_iter0++;
}
ix1 += h1_out_of_12;
- if (ix1 >= 12) {
- ix1 = 0;
+ if (ix1 >= 12u) {
+ ix1 = 0u;
src_iter1++;
}
ix2 += h2_out_of_12;
- if (ix2 >= 12) {
- ix2 = 0;
+ if (ix2 >= 12u) {
+ ix2 = 0u;
src_iter2++;
}
}
- if ((y + 1) == height) {
+ if ((y + 1u) == height) {
break;
}
- y = y + 1;
+ y = y + 1u;
iy0 += v0_out_of_12;
- if (iy0 >= 12) {
- iy0 = 0;
+ if (iy0 >= 12u) {
+ iy0 = 0u;
src_ptr0 += stride0;
}
iy1 += v1_out_of_12;
- if (iy1 >= 12) {
- iy1 = 0;
+ if (iy1 >= 12u) {
+ iy1 = 0u;
src_ptr1 += stride1;
}
iy2 += v2_out_of_12;
- if (iy2 >= 12) {
- iy2 = 0;
+ if (iy2 >= 12u) {
+ iy2 = 0u;
src_ptr2 += stride2;
}
}
}
+// --------
+
// wuffs_base__pixel_swizzler__flattened_length is like
// wuffs_base__table__flattened_length but returns uint64_t (not size_t) and
// also accounts for subsampling.
@@ -127,66 +409,78 @@
uint32_t stride,
uint32_t inv_h,
uint32_t inv_v) {
- uint64_t scaled_width = (((uint64_t)width) + (inv_h - 1)) / inv_h;
- uint64_t scaled_height = (((uint64_t)height) + (inv_v - 1)) / inv_v;
- if (scaled_height <= 0) {
- return 0;
+ uint64_t scaled_width = (((uint64_t)width) + (inv_h - 1u)) / inv_h;
+ uint64_t scaled_height = (((uint64_t)height) + (inv_v - 1u)) / inv_v;
+ if (scaled_height <= 0u) {
+ return 0u;
}
- return ((scaled_height - 1) * stride) + scaled_width;
+ return ((scaled_height - 1u) * stride) + scaled_width;
}
WUFFS_BASE__MAYBE_STATIC wuffs_base__status //
-wuffs_base__pixel_swizzler__swizzle_ycck(const wuffs_base__pixel_swizzler* p,
- wuffs_base__pixel_buffer* dst,
- wuffs_base__slice_u8 dst_palette,
- uint32_t width,
- uint32_t height,
- wuffs_base__slice_u8 src0,
- wuffs_base__slice_u8 src1,
- wuffs_base__slice_u8 src2,
- wuffs_base__slice_u8 src3,
- uint32_t width0,
- uint32_t width1,
- uint32_t width2,
- uint32_t width3,
- uint32_t height0,
- uint32_t height1,
- uint32_t height2,
- uint32_t height3,
- uint32_t stride0,
- uint32_t stride1,
- uint32_t stride2,
- uint32_t stride3,
- uint8_t h0,
- uint8_t h1,
- uint8_t h2,
- uint8_t h3,
- uint8_t v0,
- uint8_t v1,
- uint8_t v2,
- uint8_t v3,
- bool triangle_filter_for_2to1) {
+wuffs_base__pixel_swizzler__swizzle_ycck(
+ const wuffs_base__pixel_swizzler* p,
+ wuffs_base__pixel_buffer* dst,
+ wuffs_base__slice_u8 dst_palette,
+ uint32_t width,
+ uint32_t height,
+ wuffs_base__slice_u8 src0,
+ wuffs_base__slice_u8 src1,
+ wuffs_base__slice_u8 src2,
+ wuffs_base__slice_u8 src3,
+ uint32_t width0,
+ uint32_t width1,
+ uint32_t width2,
+ uint32_t width3,
+ uint32_t height0,
+ uint32_t height1,
+ uint32_t height2,
+ uint32_t height3,
+ uint32_t stride0,
+ uint32_t stride1,
+ uint32_t stride2,
+ uint32_t stride3,
+ uint8_t h0,
+ uint8_t h1,
+ uint8_t h2,
+ uint8_t h3,
+ uint8_t v0,
+ uint8_t v1,
+ uint8_t v2,
+ uint8_t v3,
+ bool triangle_filter_for_2to1,
+ wuffs_base__slice_u8 scratch_buffer_2k) {
if (!p) {
return wuffs_base__make_status(wuffs_base__error__bad_receiver);
- } else if ((h3 != 0) || (v3 != 0) || triangle_filter_for_2to1) {
- // TODO: support the K in YCCK and support triangle_filter_for_2to1.
+ } else if ((h3 != 0u) || (v3 != 0u)) {
+ // TODO: support the K in YCCK.
return wuffs_base__make_status(
wuffs_base__error__unsupported_pixel_swizzler_option);
- } else if (!dst || (width > 0xFFFF) || (height > 0xFFFF) || //
- (4 <= (h0 - 1)) || (4 <= (v0 - 1)) || //
- (4 <= (h1 - 1)) || (4 <= (v1 - 1)) || //
- (4 <= (h2 - 1)) || (4 <= (v2 - 1))) {
+ } else if (!dst || (width > 0xFFFFu) || (height > 0xFFFFu) || //
+ (4u <= (h0 - 1u)) || (4u <= (v0 - 1u)) || //
+ (4u <= (h1 - 1u)) || (4u <= (v1 - 1u)) || //
+ (4u <= (h2 - 1u)) || (4u <= (v2 - 1u)) || //
+ (scratch_buffer_2k.len < 2048u)) {
return wuffs_base__make_status(wuffs_base__error__bad_argument);
}
uint32_t max_incl_h = wuffs_base__u32__max_of_4(h0, h1, h2, h3);
uint32_t max_incl_v = wuffs_base__u32__max_of_4(v0, v1, v2, v3);
+
+ // Calculate the inverse h and v ratios.
+ //
+ // It also canonicalizes (h=2 and max_incl_h=4) as equivalent to (h=1 and
+ // max_incl_h=2). In both cases, the inv_h value is 2.
uint32_t inv_h0 = max_incl_h / h0;
uint32_t inv_h1 = max_incl_h / h1;
uint32_t inv_h2 = max_incl_h / h2;
uint32_t inv_v0 = max_incl_v / v0;
uint32_t inv_v1 = max_incl_v / v1;
uint32_t inv_v2 = max_incl_v / v2;
+
+ uint32_t half_width_for_2to1 = (width + 1u) / 2u;
+ uint32_t half_height_for_2to1 = (height + 1u) / 2u;
+
width = wuffs_base__u32__min_of_5( //
width, //
width0 * inv_h0, //
@@ -246,19 +540,31 @@
wuffs_base__error__unsupported_pixel_swizzler_option);
}
- if ((width <= 0) || (height <= 0)) {
+ if ((width <= 0u) || (height <= 0u)) {
return wuffs_base__make_status(NULL);
}
- wuffs_base__pixel_swizzler__swizzle_ycc__general__box_filter(
- p, dst, dst_palette, width, height, //
- src0.ptr, src1.ptr, src2.ptr, //
- stride0, stride1, stride2, //
- (h0 * 12) / max_incl_h, //
- (h1 * 12) / max_incl_h, //
- (h2 * 12) / max_incl_h, //
- (v0 * 12) / max_incl_v, //
- (v1 * 12) / max_incl_v, //
- (v2 * 12) / max_incl_v);
+ if (triangle_filter_for_2to1 &&
+ (wuffs_base__pixel_swizzler__has_triangle_upsampler(inv_h0, inv_v0) ||
+ wuffs_base__pixel_swizzler__has_triangle_upsampler(inv_h1, inv_v1) ||
+ wuffs_base__pixel_swizzler__has_triangle_upsampler(inv_h2, inv_v2))) {
+ wuffs_base__pixel_swizzler__swizzle_ycc__general__triangle_filter(
+ dst, width, height, //
+ src0.ptr, src1.ptr, src2.ptr, //
+ stride0, stride1, stride2, //
+ inv_h0, inv_h1, inv_h2, //
+ inv_v0, inv_v1, inv_v2, //
+ half_width_for_2to1, half_height_for_2to1, //
+ scratch_buffer_2k.ptr);
+
+ } else {
+ wuffs_base__pixel_swizzler__swizzle_ycc__general__box_filter(
+ dst, width, height, //
+ src0.ptr, src1.ptr, src2.ptr, //
+ stride0, stride1, stride2, //
+ inv_h0, inv_h1, inv_h2, //
+ inv_v0, inv_v1, inv_v2);
+ }
+
return wuffs_base__make_status(NULL);
}
diff --git a/lang/builtin/builtin.go b/lang/builtin/builtin.go
index d0c3215..fceef8a 100644
--- a/lang/builtin/builtin.go
+++ b/lang/builtin/builtin.go
@@ -672,7 +672,8 @@
"v1: u8[..= 4]," +
"v2: u8[..= 4]," +
"v3: u8[..= 4]," +
- "triangle_filter_for_2to1: bool) status",
+ "triangle_filter_for_2to1: bool," +
+ "scratch_buffer_2k: slice u8) status",
// ---- arm_crc32_utility
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index e667f57..af37bd6 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -12733,36 +12733,38 @@
uint64_t num_pixels);
WUFFS_BASE__MAYBE_STATIC wuffs_base__status //
-wuffs_base__pixel_swizzler__swizzle_ycck(const wuffs_base__pixel_swizzler* p,
- wuffs_base__pixel_buffer* dst,
- wuffs_base__slice_u8 dst_palette,
- uint32_t width,
- uint32_t height,
- wuffs_base__slice_u8 src0,
- wuffs_base__slice_u8 src1,
- wuffs_base__slice_u8 src2,
- wuffs_base__slice_u8 src3,
- uint32_t width0,
- uint32_t width1,
- uint32_t width2,
- uint32_t width3,
- uint32_t height0,
- uint32_t height1,
- uint32_t height2,
- uint32_t height3,
- uint32_t stride0,
- uint32_t stride1,
- uint32_t stride2,
- uint32_t stride3,
- uint8_t h0,
- uint8_t h1,
- uint8_t h2,
- uint8_t h3,
- uint8_t v0,
- uint8_t v1,
- uint8_t v2,
- uint8_t v3,
- bool triangle_filter_for_2to1);
+wuffs_base__pixel_swizzler__swizzle_ycck(
+ const wuffs_base__pixel_swizzler* p,
+ wuffs_base__pixel_buffer* dst,
+ wuffs_base__slice_u8 dst_palette,
+ uint32_t width,
+ uint32_t height,
+ wuffs_base__slice_u8 src0,
+ wuffs_base__slice_u8 src1,
+ wuffs_base__slice_u8 src2,
+ wuffs_base__slice_u8 src3,
+ uint32_t width0,
+ uint32_t width1,
+ uint32_t width2,
+ uint32_t width3,
+ uint32_t height0,
+ uint32_t height1,
+ uint32_t height2,
+ uint32_t height3,
+ uint32_t stride0,
+ uint32_t stride1,
+ uint32_t stride2,
+ uint32_t stride3,
+ uint8_t h0,
+ uint8_t h1,
+ uint8_t h2,
+ uint8_t h3,
+ uint8_t v0,
+ uint8_t v1,
+ uint8_t v2,
+ uint8_t v3,
+ bool triangle_filter_for_2to1,
+ wuffs_base__slice_u8 scratch_buffer_2k);
// ---------------- Images (Utility)
@@ -22680,14 +22682,241 @@
e);
}
-// Preconditions: see all the checks made in
-// wuffs_base__pixel_swizzler__swizzle_ycck before calling this function. For
+// --------
+
+// wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upsamples to a
+// destination slice at least 480 (YCCK) or 672 (YCC) bytes long and whose
+// src_len (multiplied by 1, 2, 3 or 4) is positive but no more than that. This
+// 480 or 672 length is just under 1/4 or 1/3 of the scratch_buffer_2k slice
+// length. Both (480 * 4) = 1920 and (672 * 3) = 2016 are less than 2048.
+//
+// 480 and 672 are nice round numbers because a JPEG MCU is 1, 2, 3 or 4 blocks
+// wide and each block is 8 pixels wide. We have:
+// 480 = 1 * 8 * 60, 672 = 1 * 8 * 84
+// 480 = 2 * 8 * 30, 672 = 2 * 8 * 42
+// 480 = 3 * 8 * 20, 672 = 3 * 8 * 28
+// 480 = 4 * 8 * 15, 672 = 4 * 8 * 21
+//
+// Box filters are equivalent to nearest neighbor upsampling. These ignore the
+// src_ptr_minor, h1v2_bias, first_column and last_column arguments.
+//
+// TODO: triangle filters.
+typedef const uint8_t* (
+ *wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func)(
+ uint8_t* dst_ptr,
+ const uint8_t* src_ptr_major, // Nearest row.
+ const uint8_t* src_ptr_minor, // Adjacent row, alternating above or below.
+ size_t src_len,
+ uint32_t h1v2_bias,
+ bool first_column,
+ bool last_column);
+
+static const uint8_t* //
+wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h1vn_box(
+ uint8_t* dst_ptr,
+ const uint8_t* src_ptr_major,
+ const uint8_t* src_ptr_minor_ignored,
+ size_t src_len,
+ uint32_t h1v2_bias_ignored,
+ bool first_column_ignored,
+ bool last_column_ignored) {
+ return src_ptr_major;
+}
+
+static const uint8_t* //
+wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box(
+ uint8_t* dst_ptr,
+ const uint8_t* src_ptr_major,
+ const uint8_t* src_ptr_minor_ignored,
+ size_t src_len,
+ uint32_t h1v2_bias_ignored,
+ bool first_column_ignored,
+ bool last_column_ignored) {
+ uint8_t* dp = dst_ptr;
+ const uint8_t* sp = src_ptr_major;
+ while (src_len--) {
+ uint8_t sv = *sp++;
+ *dp++ = sv;
+ *dp++ = sv;
+ }
+ return dst_ptr;
+}
+
+static const uint8_t* //
+wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h3vn_box(
+ uint8_t* dst_ptr,
+ const uint8_t* src_ptr_major,
+ const uint8_t* src_ptr_minor_ignored,
+ size_t src_len,
+ uint32_t h1v2_bias_ignored,
+ bool first_column_ignored,
+ bool last_column_ignored) {
+ uint8_t* dp = dst_ptr;
+ const uint8_t* sp = src_ptr_major;
+ while (src_len--) {
+ uint8_t sv = *sp++;
+ *dp++ = sv;
+ *dp++ = sv;
+ *dp++ = sv;
+ }
+ return dst_ptr;
+}
+
+static const uint8_t* //
+wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h4vn_box(
+ uint8_t* dst_ptr,
+ const uint8_t* src_ptr_major,
+ const uint8_t* src_ptr_minor_ignored,
+ size_t src_len,
+ uint32_t h1v2_bias_ignored,
+ bool first_column_ignored,
+ bool last_column_ignored) {
+ uint8_t* dp = dst_ptr;
+ const uint8_t* sp = src_ptr_major;
+ while (src_len--) {
+ uint8_t sv = *sp++;
+ *dp++ = sv;
+ *dp++ = sv;
+ *dp++ = sv;
+ *dp++ = sv;
+ }
+ return dst_ptr;
+}
+
+// wuffs_base__pixel_swizzler__swizzle_ycc__upsample_funcs is indexed by inv_h
+// and then inv_v.
+static const wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_funcs[4][4] = {
+ {
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h1vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h1vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h1vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h1vn_box,
+ },
+ {
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box,
+ },
+ {
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h3vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h3vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h3vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h3vn_box,
+ },
+ {
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h4vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h4vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h4vn_box,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h4vn_box,
+ },
+};
+
+static inline uint32_t //
+wuffs_base__pixel_swizzler__has_triangle_upsampler(uint32_t inv_h,
+ uint32_t inv_v) {
+ if (inv_h == 1u) {
+ return inv_v == 2u;
+ } else if (inv_h == 2u) {
+ return (inv_v == 1u) || (inv_v == 2u);
+ }
+ return false;
+}
+
+// --------
+
+// All of the wuffs_base__pixel_swizzler__swizzle_ycc__etc functions have
+// preconditions. See all of the checks made in
+// wuffs_base__pixel_swizzler__swizzle_ycck before calling these functions. For
// example, (width > 0) is a precondition, but there are many more.
+
static void //
-wuffs_base__pixel_swizzler__swizzle_ycc__general__box_filter(
- const wuffs_base__pixel_swizzler* p,
+wuffs_base__pixel_swizzler__swizzle_ycc__general__triangle_filter_single_row(
wuffs_base__pixel_buffer* dst,
- wuffs_base__slice_u8 dst_palette,
+ uint32_t width,
+ uint32_t y,
+ const uint8_t* src_ptr0,
+ const uint8_t* src_ptr1,
+ const uint8_t* src_ptr2,
+ uint32_t stride0,
+ uint32_t stride1,
+ uint32_t stride2,
+ uint32_t inv_h0,
+ uint32_t inv_h1,
+ uint32_t inv_h2,
+ uint32_t inv_v0,
+ uint32_t inv_v1,
+ uint32_t inv_v2,
+ uint32_t half_width_for_2to1,
+ uint32_t h1v2_bias,
+ uint8_t* scratch_buffer_2k_ptr,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc0,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc1,
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc2) {
+ const uint8_t* src0 = src_ptr0 + ((y / inv_v0) * (size_t)stride0);
+ const uint8_t* src1 = src_ptr1 + ((y / inv_v1) * (size_t)stride1);
+ const uint8_t* src2 = src_ptr2 + ((y / inv_v2) * (size_t)stride2);
+ uint32_t total_src_len0 = 0u;
+ uint32_t total_src_len1 = 0u;
+ uint32_t total_src_len2 = 0u;
+
+ uint32_t x = 0u;
+ while (x < width) {
+ bool first_column = x == 0u;
+ uint32_t end = x + 672u;
+ if (end > width) {
+ end = width;
+ }
+
+ uint32_t src_len0 = ((end - x) + inv_h0 - 1u) / inv_h0;
+ uint32_t src_len1 = ((end - x) + inv_h1 - 1u) / inv_h1;
+ uint32_t src_len2 = ((end - x) + inv_h2 - 1u) / inv_h2;
+ total_src_len0 += src_len0;
+ total_src_len1 += src_len1;
+ total_src_len2 += src_len2;
+
+ const uint8_t* src_ptr_x0 = src0 + (x / inv_h0);
+ const uint8_t* up0 = (*upfunc0)( //
+ scratch_buffer_2k_ptr + (0u * 672u), //
+ src_ptr_x0, //
+ src_ptr_x0, //
+ src_len0, //
+ h1v2_bias, //
+ first_column, //
+ (total_src_len0 >= half_width_for_2to1));
+
+ const uint8_t* src_ptr_x1 = src1 + (x / inv_h1);
+ const uint8_t* up1 = (*upfunc1)( //
+ scratch_buffer_2k_ptr + (1u * 672u), //
+ src_ptr_x1, //
+ src_ptr_x1, //
+ src_len1, //
+ h1v2_bias, //
+ first_column, //
+ (total_src_len1 >= half_width_for_2to1));
+
+ const uint8_t* src_ptr_x2 = src2 + (x / inv_h2);
+ const uint8_t* up2 = (*upfunc2)( //
+ scratch_buffer_2k_ptr + (2u * 672u), //
+ src_ptr_x2, //
+ src_ptr_x2, //
+ src_len2, //
+ h1v2_bias, //
+ first_column, //
+ (total_src_len2 >= half_width_for_2to1));
+
+ for (; x < end; x++) {
+ wuffs_base__pixel_buffer__set_color_u32_at(
+ dst, x, y,
+ wuffs_base__color_ycc__as__color_u32(*up0++, *up1++, *up2++));
+ }
+ }
+}
+
+static void //
+wuffs_base__pixel_swizzler__swizzle_ycc__general__triangle_filter(
+ wuffs_base__pixel_buffer* dst,
uint32_t width,
uint32_t height,
const uint8_t* src_ptr0,
@@ -22696,74 +22925,129 @@
uint32_t stride0,
uint32_t stride1,
uint32_t stride2,
- uint32_t h0_out_of_12,
- uint32_t h1_out_of_12,
- uint32_t h2_out_of_12,
- uint32_t v0_out_of_12,
- uint32_t v1_out_of_12,
- uint32_t v2_out_of_12) {
- uint32_t iy0 = 0;
- uint32_t iy1 = 0;
- uint32_t iy2 = 0;
- uint32_t y = 0;
+ uint32_t inv_h0,
+ uint32_t inv_h1,
+ uint32_t inv_h2,
+ uint32_t inv_v0,
+ uint32_t inv_v1,
+ uint32_t inv_v2,
+ uint32_t half_width_for_2to1,
+ uint32_t half_height_for_2to1,
+ uint8_t* scratch_buffer_2k_ptr) {
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc0 =
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_funcs
+ [(inv_h0 - 1u) & 3u][(inv_v0 - 1u) & 3u];
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc1 =
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_funcs
+ [(inv_h1 - 1u) & 3u][(inv_v1 - 1u) & 3u];
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc2 =
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_funcs
+ [(inv_h2 - 1u) & 3u][(inv_v2 - 1u) & 3u];
+
+ uint32_t y;
+ for (y = 0u; y < height; y++) {
+ wuffs_base__pixel_swizzler__swizzle_ycc__general__triangle_filter_single_row(
+ dst, width, y, //
+ src_ptr0, src_ptr1, src_ptr2, //
+ stride0, stride1, stride2, //
+ inv_h0, inv_h1, inv_h2, //
+ inv_v0, inv_v1, inv_v2, //
+ half_width_for_2to1, //
+ 0u, //
+ scratch_buffer_2k_ptr, //
+ upfunc0, upfunc1, upfunc2);
+ }
+}
+
+static void //
+wuffs_base__pixel_swizzler__swizzle_ycc__general__box_filter(
+ wuffs_base__pixel_buffer* dst,
+ uint32_t width,
+ uint32_t height,
+ const uint8_t* src_ptr0,
+ const uint8_t* src_ptr1,
+ const uint8_t* src_ptr2,
+ uint32_t stride0,
+ uint32_t stride1,
+ uint32_t stride2,
+ uint32_t inv_h0,
+ uint32_t inv_h1,
+ uint32_t inv_h2,
+ uint32_t inv_v0,
+ uint32_t inv_v1,
+ uint32_t inv_v2) {
+ // Convert an inv_h or inv_v value from {1, 2, 3, 4} to {12, 6, 4, 3}.
+ uint32_t h0_out_of_12 = 12u / inv_h0;
+ uint32_t h1_out_of_12 = 12u / inv_h1;
+ uint32_t h2_out_of_12 = 12u / inv_h2;
+ uint32_t v0_out_of_12 = 12u / inv_v0;
+ uint32_t v1_out_of_12 = 12u / inv_v1;
+ uint32_t v2_out_of_12 = 12u / inv_v2;
+
+ uint32_t iy0 = 0u;
+ uint32_t iy1 = 0u;
+ uint32_t iy2 = 0u;
+ uint32_t y = 0u;
while (true) {
const uint8_t* src_iter0 = src_ptr0;
const uint8_t* src_iter1 = src_ptr1;
const uint8_t* src_iter2 = src_ptr2;
- uint32_t ix0 = 0;
- uint32_t ix1 = 0;
- uint32_t ix2 = 0;
- uint32_t x = 0;
+ uint32_t ix0 = 0u;
+ uint32_t ix1 = 0u;
+ uint32_t ix2 = 0u;
+ uint32_t x = 0u;
while (true) {
wuffs_base__pixel_buffer__set_color_u32_at(
dst, x, y,
wuffs_base__color_ycc__as__color_u32(*src_iter0, *src_iter1,
*src_iter2));
- if ((x + 1) == width) {
+ if ((x + 1u) == width) {
break;
}
- x = x + 1;
+ x = x + 1u;
ix0 += h0_out_of_12;
- if (ix0 >= 12) {
- ix0 = 0;
+ if (ix0 >= 12u) {
+ ix0 = 0u;
src_iter0++;
}
ix1 += h1_out_of_12;
- if (ix1 >= 12) {
- ix1 = 0;
+ if (ix1 >= 12u) {
+ ix1 = 0u;
src_iter1++;
}
ix2 += h2_out_of_12;
- if (ix2 >= 12) {
- ix2 = 0;
+ if (ix2 >= 12u) {
+ ix2 = 0u;
src_iter2++;
}
}
- if ((y + 1) == height) {
+ if ((y + 1u) == height) {
break;
}
- y = y + 1;
+ y = y + 1u;
iy0 += v0_out_of_12;
- if (iy0 >= 12) {
- iy0 = 0;
+ if (iy0 >= 12u) {
+ iy0 = 0u;
src_ptr0 += stride0;
}
iy1 += v1_out_of_12;
- if (iy1 >= 12) {
- iy1 = 0;
+ if (iy1 >= 12u) {
+ iy1 = 0u;
src_ptr1 += stride1;
}
iy2 += v2_out_of_12;
- if (iy2 >= 12) {
- iy2 = 0;
+ if (iy2 >= 12u) {
+ iy2 = 0u;
src_ptr2 += stride2;
}
}
}
+// --------
+
// wuffs_base__pixel_swizzler__flattened_length is like
// wuffs_base__table__flattened_length but returns uint64_t (not size_t) and
// also accounts for subsampling.
@@ -22773,66 +23057,78 @@
uint32_t stride,
uint32_t inv_h,
uint32_t inv_v) {
- uint64_t scaled_width = (((uint64_t)width) + (inv_h - 1)) / inv_h;
- uint64_t scaled_height = (((uint64_t)height) + (inv_v - 1)) / inv_v;
- if (scaled_height <= 0) {
- return 0;
+ uint64_t scaled_width = (((uint64_t)width) + (inv_h - 1u)) / inv_h;
+ uint64_t scaled_height = (((uint64_t)height) + (inv_v - 1u)) / inv_v;
+ if (scaled_height <= 0u) {
+ return 0u;
}
- return ((scaled_height - 1) * stride) + scaled_width;
+ return ((scaled_height - 1u) * stride) + scaled_width;
}
WUFFS_BASE__MAYBE_STATIC wuffs_base__status //
-wuffs_base__pixel_swizzler__swizzle_ycck(const wuffs_base__pixel_swizzler* p,
- wuffs_base__pixel_buffer* dst,
- wuffs_base__slice_u8 dst_palette,
- uint32_t width,
- uint32_t height,
- wuffs_base__slice_u8 src0,
- wuffs_base__slice_u8 src1,
- wuffs_base__slice_u8 src2,
- wuffs_base__slice_u8 src3,
- uint32_t width0,
- uint32_t width1,
- uint32_t width2,
- uint32_t width3,
- uint32_t height0,
- uint32_t height1,
- uint32_t height2,
- uint32_t height3,
- uint32_t stride0,
- uint32_t stride1,
- uint32_t stride2,
- uint32_t stride3,
- uint8_t h0,
- uint8_t h1,
- uint8_t h2,
- uint8_t h3,
- uint8_t v0,
- uint8_t v1,
- uint8_t v2,
- uint8_t v3,
- bool triangle_filter_for_2to1) {
+wuffs_base__pixel_swizzler__swizzle_ycck(
+ const wuffs_base__pixel_swizzler* p,
+ wuffs_base__pixel_buffer* dst,
+ wuffs_base__slice_u8 dst_palette,
+ uint32_t width,
+ uint32_t height,
+ wuffs_base__slice_u8 src0,
+ wuffs_base__slice_u8 src1,
+ wuffs_base__slice_u8 src2,
+ wuffs_base__slice_u8 src3,
+ uint32_t width0,
+ uint32_t width1,
+ uint32_t width2,
+ uint32_t width3,
+ uint32_t height0,
+ uint32_t height1,
+ uint32_t height2,
+ uint32_t height3,
+ uint32_t stride0,
+ uint32_t stride1,
+ uint32_t stride2,
+ uint32_t stride3,
+ uint8_t h0,
+ uint8_t h1,
+ uint8_t h2,
+ uint8_t h3,
+ uint8_t v0,
+ uint8_t v1,
+ uint8_t v2,
+ uint8_t v3,
+ bool triangle_filter_for_2to1,
+ wuffs_base__slice_u8 scratch_buffer_2k) {
if (!p) {
return wuffs_base__make_status(wuffs_base__error__bad_receiver);
- } else if ((h3 != 0) || (v3 != 0) || triangle_filter_for_2to1) {
- // TODO: support the K in YCCK and support triangle_filter_for_2to1.
+ } else if ((h3 != 0u) || (v3 != 0u)) {
+ // TODO: support the K in YCCK.
return wuffs_base__make_status(
wuffs_base__error__unsupported_pixel_swizzler_option);
- } else if (!dst || (width > 0xFFFF) || (height > 0xFFFF) || //
- (4 <= (h0 - 1)) || (4 <= (v0 - 1)) || //
- (4 <= (h1 - 1)) || (4 <= (v1 - 1)) || //
- (4 <= (h2 - 1)) || (4 <= (v2 - 1))) {
+ } else if (!dst || (width > 0xFFFFu) || (height > 0xFFFFu) || //
+ (4u <= (h0 - 1u)) || (4u <= (v0 - 1u)) || //
+ (4u <= (h1 - 1u)) || (4u <= (v1 - 1u)) || //
+ (4u <= (h2 - 1u)) || (4u <= (v2 - 1u)) || //
+ (scratch_buffer_2k.len < 2048u)) {
return wuffs_base__make_status(wuffs_base__error__bad_argument);
}
uint32_t max_incl_h = wuffs_base__u32__max_of_4(h0, h1, h2, h3);
uint32_t max_incl_v = wuffs_base__u32__max_of_4(v0, v1, v2, v3);
+
+ // Calculate the inverse h and v ratios.
+ //
+ // It also canonicalizes (h=2 and max_incl_h=4) as equivalent to (h=1 and
+ // max_incl_h=2). In both cases, the inv_h value is 2.
uint32_t inv_h0 = max_incl_h / h0;
uint32_t inv_h1 = max_incl_h / h1;
uint32_t inv_h2 = max_incl_h / h2;
uint32_t inv_v0 = max_incl_v / v0;
uint32_t inv_v1 = max_incl_v / v1;
uint32_t inv_v2 = max_incl_v / v2;
+
+ uint32_t half_width_for_2to1 = (width + 1u) / 2u;
+ uint32_t half_height_for_2to1 = (height + 1u) / 2u;
+
width = wuffs_base__u32__min_of_5( //
width, //
width0 * inv_h0, //
@@ -22892,20 +23188,32 @@
wuffs_base__error__unsupported_pixel_swizzler_option);
}
- if ((width <= 0) || (height <= 0)) {
+ if ((width <= 0u) || (height <= 0u)) {
return wuffs_base__make_status(NULL);
}
- wuffs_base__pixel_swizzler__swizzle_ycc__general__box_filter(
- p, dst, dst_palette, width, height, //
- src0.ptr, src1.ptr, src2.ptr, //
- stride0, stride1, stride2, //
- (h0 * 12) / max_incl_h, //
- (h1 * 12) / max_incl_h, //
- (h2 * 12) / max_incl_h, //
- (v0 * 12) / max_incl_v, //
- (v1 * 12) / max_incl_v, //
- (v2 * 12) / max_incl_v);
+ if (triangle_filter_for_2to1 &&
+ (wuffs_base__pixel_swizzler__has_triangle_upsampler(inv_h0, inv_v0) ||
+ wuffs_base__pixel_swizzler__has_triangle_upsampler(inv_h1, inv_v1) ||
+ wuffs_base__pixel_swizzler__has_triangle_upsampler(inv_h2, inv_v2))) {
+ wuffs_base__pixel_swizzler__swizzle_ycc__general__triangle_filter(
+ dst, width, height, //
+ src0.ptr, src1.ptr, src2.ptr, //
+ stride0, stride1, stride2, //
+ inv_h0, inv_h1, inv_h2, //
+ inv_v0, inv_v1, inv_v2, //
+ half_width_for_2to1, half_height_for_2to1, //
+ scratch_buffer_2k.ptr);
+
+ } else {
+ wuffs_base__pixel_swizzler__swizzle_ycc__general__box_filter(
+ dst, width, height, //
+ src0.ptr, src1.ptr, src2.ptr, //
+ stride0, stride1, stride2, //
+ inv_h0, inv_h1, inv_h2, //
+ inv_v0, inv_v1, inv_v2);
+ }
+
return wuffs_base__make_status(NULL);
}
@@ -37757,6 +38065,7 @@
bool v_has_h3 = false;
bool v_has_v24 = false;
bool v_has_v3 = false;
+ uint32_t v_upper_bound = 0;
const uint8_t* iop_a_src = NULL;
const uint8_t* io0_a_src WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
@@ -37988,28 +38297,15 @@
} else {
self->private_impl.f_height_in_mcus = ((self->private_impl.f_height + 31) / 32);
}
- v_i = 0;
- while (v_i < self->private_impl.f_num_components) {
- if (self->private_impl.f_components_h[v_i] == 1) {
- self->private_impl.f_components_workbuf_widths[v_i] = (((self->private_impl.f_width + 7) / 8) * 8);
- } else if (self->private_impl.f_components_h[v_i] == 2) {
- self->private_impl.f_components_workbuf_widths[v_i] = (((self->private_impl.f_width + 15) / 16) * 16);
- } else if (self->private_impl.f_components_h[v_i] == 3) {
- self->private_impl.f_components_workbuf_widths[v_i] = (((self->private_impl.f_width + 23) / 24) * 24);
- } else {
- self->private_impl.f_components_workbuf_widths[v_i] = (((self->private_impl.f_width + 31) / 32) * 32);
- }
- if (self->private_impl.f_components_v[v_i] == 1) {
- self->private_impl.f_components_workbuf_heights[v_i] = (((self->private_impl.f_height + 7) / 8) * 8);
- } else if (self->private_impl.f_components_v[v_i] == 2) {
- self->private_impl.f_components_workbuf_heights[v_i] = (((self->private_impl.f_height + 15) / 16) * 16);
- } else if (self->private_impl.f_components_v[v_i] == 3) {
- self->private_impl.f_components_workbuf_heights[v_i] = (((self->private_impl.f_height + 23) / 24) * 24);
- } else {
- self->private_impl.f_components_workbuf_heights[v_i] = (((self->private_impl.f_height + 31) / 32) * 32);
- }
- v_i += 1;
- }
+ v_upper_bound = 65544;
+ self->private_impl.f_components_workbuf_widths[0] = wuffs_base__u32__min(v_upper_bound, (8 * self->private_impl.f_width_in_mcus * ((uint32_t)(self->private_impl.f_components_h[0]))));
+ self->private_impl.f_components_workbuf_widths[1] = wuffs_base__u32__min(v_upper_bound, (8 * self->private_impl.f_width_in_mcus * ((uint32_t)(self->private_impl.f_components_h[1]))));
+ self->private_impl.f_components_workbuf_widths[2] = wuffs_base__u32__min(v_upper_bound, (8 * self->private_impl.f_width_in_mcus * ((uint32_t)(self->private_impl.f_components_h[2]))));
+ self->private_impl.f_components_workbuf_widths[3] = wuffs_base__u32__min(v_upper_bound, (8 * self->private_impl.f_width_in_mcus * ((uint32_t)(self->private_impl.f_components_h[3]))));
+ self->private_impl.f_components_workbuf_heights[0] = wuffs_base__u32__min(v_upper_bound, (8 * self->private_impl.f_height_in_mcus * ((uint32_t)(self->private_impl.f_components_v[0]))));
+ self->private_impl.f_components_workbuf_heights[1] = wuffs_base__u32__min(v_upper_bound, (8 * self->private_impl.f_height_in_mcus * ((uint32_t)(self->private_impl.f_components_v[1]))));
+ self->private_impl.f_components_workbuf_heights[2] = wuffs_base__u32__min(v_upper_bound, (8 * self->private_impl.f_height_in_mcus * ((uint32_t)(self->private_impl.f_components_v[2]))));
+ self->private_impl.f_components_workbuf_heights[3] = wuffs_base__u32__min(v_upper_bound, (8 * self->private_impl.f_height_in_mcus * ((uint32_t)(self->private_impl.f_components_v[3]))));
self->private_impl.f_components_workbuf_offsets[0] = 0;
self->private_impl.f_components_workbuf_offsets[1] = (self->private_impl.f_components_workbuf_offsets[0] + (((uint64_t)(self->private_impl.f_components_workbuf_widths[0])) * ((uint64_t)(self->private_impl.f_components_workbuf_heights[0]))));
self->private_impl.f_components_workbuf_offsets[2] = (self->private_impl.f_components_workbuf_offsets[1] + (((uint64_t)(self->private_impl.f_components_workbuf_widths[1])) * ((uint64_t)(self->private_impl.f_components_workbuf_heights[1]))));
@@ -39419,7 +39715,8 @@
self->private_impl.f_components_v[1],
self->private_impl.f_components_v[2],
self->private_impl.f_components_v[3],
- false);
+ true,
+ wuffs_base__make_slice_u8(self->private_data.f_bitstream_buffer, 2048));
return wuffs_base__status__ensure_not_a_suspension(v_status);
}
diff --git a/std/jpeg/decode_jpeg.wuffs b/std/jpeg/decode_jpeg.wuffs
index 65a4461..c3bcaae 100644
--- a/std/jpeg/decode_jpeg.wuffs
+++ b/std/jpeg/decode_jpeg.wuffs
@@ -366,16 +366,19 @@
}
pri func decoder.decode_sof?(src: base.io_reader) {
- var c : base.u8
- var comp_h : base.u8
- var comp_v : base.u8
- var i : base.u32
- var j : base.u32
+ var c : base.u8
+ var comp_h : base.u8
+ var comp_v : base.u8
+ var i : base.u32
+ var j : base.u32
+
var has_h24 : base.bool
var has_h3 : base.bool
var has_v24 : base.bool
var has_v3 : base.bool
+ var upper_bound : base.u32[..= 0x1_0008]
+
if this.payload_length < 6 {
return "#bad SOF marker"
}
@@ -498,32 +501,25 @@
this.height_in_mcus = (this.height + 0x1F) / 0x20
}
- i = 0
- while i < this.num_components {
- assert i < 4 via "a < b: a < c; c <= b"(c: this.num_components)
+ upper_bound = 0x1_0008
- if this.components_h[i] == 1 {
- this.components_workbuf_widths[i] = ((this.width + 0x07) / 0x08) * 0x08
- } else if this.components_h[i] == 2 {
- this.components_workbuf_widths[i] = ((this.width + 0x0F) / 0x10) * 0x10
- } else if this.components_h[i] == 3 {
- this.components_workbuf_widths[i] = ((this.width + 0x17) / 0x18) * 0x18
- } else {
- this.components_workbuf_widths[i] = ((this.width + 0x1F) / 0x20) * 0x20
- }
+ this.components_workbuf_widths[0] = upper_bound.min(no_more_than:
+ 8 * this.width_in_mcus * (this.components_h[0] as base.u32))
+ this.components_workbuf_widths[1] = upper_bound.min(no_more_than:
+ 8 * this.width_in_mcus * (this.components_h[1] as base.u32))
+ this.components_workbuf_widths[2] = upper_bound.min(no_more_than:
+ 8 * this.width_in_mcus * (this.components_h[2] as base.u32))
+ this.components_workbuf_widths[3] = upper_bound.min(no_more_than:
+ 8 * this.width_in_mcus * (this.components_h[3] as base.u32))
- if this.components_v[i] == 1 {
- this.components_workbuf_heights[i] = ((this.height + 0x07) / 0x08) * 0x08
- } else if this.components_v[i] == 2 {
- this.components_workbuf_heights[i] = ((this.height + 0x0F) / 0x10) * 0x10
- } else if this.components_v[i] == 3 {
- this.components_workbuf_heights[i] = ((this.height + 0x17) / 0x18) * 0x18
- } else {
- this.components_workbuf_heights[i] = ((this.height + 0x1F) / 0x20) * 0x20
- }
-
- i += 1
- } endwhile
+ this.components_workbuf_heights[0] = upper_bound.min(no_more_than:
+ 8 * this.height_in_mcus * (this.components_v[0] as base.u32))
+ this.components_workbuf_heights[1] = upper_bound.min(no_more_than:
+ 8 * this.height_in_mcus * (this.components_v[1] as base.u32))
+ this.components_workbuf_heights[2] = upper_bound.min(no_more_than:
+ 8 * this.height_in_mcus * (this.components_v[2] as base.u32))
+ this.components_workbuf_heights[3] = upper_bound.min(no_more_than:
+ 8 * this.height_in_mcus * (this.components_v[3] as base.u32))
this.components_workbuf_offsets[0] = 0
this.components_workbuf_offsets[1] = this.components_workbuf_offsets[0] +
@@ -1437,7 +1433,8 @@
v1: this.components_v[1],
v2: this.components_v[2],
v3: this.components_v[3],
- triangle_filter_for_2to1: false)
+ triangle_filter_for_2to1: true,
+ scratch_buffer_2k: this.bitstream_buffer[..])
return status
}
diff --git a/test/c/std/wbmp.c b/test/c/std/wbmp.c
index 640d60d..7090e03 100644
--- a/test/c/std/wbmp.c
+++ b/test/c/std/wbmp.c
@@ -512,6 +512,36 @@
return NULL;
}
+const char* //
+test_wuffs_upsample_inv_h2v1() {
+ CHECK_FOCUS(__func__);
+
+ // src_array0 is "A lovely example".
+ const uint8_t src_array0[16] = {
+ 0x41, 0x20, 0x6C, 0x6F, 0x76, 0x65, 0x6C, 0x79, //
+ 0x20, 0x65, 0x78, 0x61, 0x6D, 0x70, 0x6C, 0x65, //
+ };
+
+ const uint8_t want_array[32] = {
+ 0x41, 0x41, 0x20, 0x20, 0x6C, 0x6C, 0x6F, 0x6F, //
+ 0x76, 0x76, 0x65, 0x65, 0x6C, 0x6C, 0x79, 0x79, //
+ 0x20, 0x20, 0x65, 0x65, 0x78, 0x78, 0x61, 0x61, //
+ 0x6D, 0x6D, 0x70, 0x70, 0x6C, 0x6C, 0x65, 0x65, //
+ };
+
+ const uint8_t* have_ptr =
+ wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box(
+ g_have_array_u8, src_array0, src_array0, 16, 0, true, true);
+
+ const bool closed = true;
+ wuffs_base__io_buffer have = wuffs_base__ptr_u8__reader( //
+ (void*)have_ptr, 32, closed);
+ wuffs_base__io_buffer want = wuffs_base__ptr_u8__reader( //
+ (void*)want_array, 32, closed);
+
+ return check_io_buffers_equal("", &have, &want);
+}
+
// ---------------- WBMP Tests
const char* //
@@ -769,6 +799,7 @@
test_wuffs_color_ycc_as_color_u32,
test_wuffs_pixel_buffer_fill_rect,
test_wuffs_pixel_swizzler_swizzle,
+ test_wuffs_upsample_inv_h2v1,
test_wuffs_wbmp_decode_frame_config,
test_wuffs_wbmp_decode_image_config,