blob: 004b1e20db7d6fe30c2516bee9511e4e48544699 [file] [log] [blame]
// Copyright 2023 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// --------
static inline uint32_t //
wuffs_base__u32__max_of_4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
return wuffs_base__u32__max( //
wuffs_base__u32__max(a, b), //
wuffs_base__u32__max(c, d));
}
static inline uint32_t //
wuffs_base__u32__min_of_5(uint32_t a,
uint32_t b,
uint32_t c,
uint32_t d,
uint32_t e) {
return wuffs_base__u32__min( //
wuffs_base__u32__min( //
wuffs_base__u32__min(a, b), //
wuffs_base__u32__min(c, d)), //
e);
}
// --------
// wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upsamples to a
// destination slice at least 480 (YCCK) or 672 (YCC) bytes long and whose
// src_len (multiplied by 1, 2, 3 or 4) is positive but no more than that. This
// 480 or 672 length is just under 1/4 or 1/3 of the scratch_buffer_2k slice
// length. Both (480 * 4) = 1920 and (672 * 3) = 2016 are less than 2048.
//
// 480 and 672 are nice round numbers because a JPEG MCU is 1, 2, 3 or 4 blocks
// wide and each block is 8 pixels wide. We have:
// 480 = 1 * 8 * 60, 672 = 1 * 8 * 84
// 480 = 2 * 8 * 30, 672 = 2 * 8 * 42
// 480 = 3 * 8 * 20, 672 = 3 * 8 * 28
// 480 = 4 * 8 * 15, 672 = 4 * 8 * 21
//
// Box filters are equivalent to nearest neighbor upsampling. These ignore the
// src_ptr_minor, h1v2_bias, first_column and last_column arguments.
//
// TODO: triangle filters.
typedef const uint8_t* (
*wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func)(
uint8_t* dst_ptr,
const uint8_t* src_ptr_major, // Nearest row.
const uint8_t* src_ptr_minor, // Adjacent row, alternating above or below.
size_t src_len,
uint32_t h1v2_bias,
bool first_column,
bool last_column);
static const uint8_t* //
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h1vn_box(
uint8_t* dst_ptr,
const uint8_t* src_ptr_major,
const uint8_t* src_ptr_minor_ignored,
size_t src_len,
uint32_t h1v2_bias_ignored,
bool first_column_ignored,
bool last_column_ignored) {
return src_ptr_major;
}
static const uint8_t* //
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box(
uint8_t* dst_ptr,
const uint8_t* src_ptr_major,
const uint8_t* src_ptr_minor_ignored,
size_t src_len,
uint32_t h1v2_bias_ignored,
bool first_column_ignored,
bool last_column_ignored) {
uint8_t* dp = dst_ptr;
const uint8_t* sp = src_ptr_major;
while (src_len--) {
uint8_t sv = *sp++;
*dp++ = sv;
*dp++ = sv;
}
return dst_ptr;
}
static const uint8_t* //
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h3vn_box(
uint8_t* dst_ptr,
const uint8_t* src_ptr_major,
const uint8_t* src_ptr_minor_ignored,
size_t src_len,
uint32_t h1v2_bias_ignored,
bool first_column_ignored,
bool last_column_ignored) {
uint8_t* dp = dst_ptr;
const uint8_t* sp = src_ptr_major;
while (src_len--) {
uint8_t sv = *sp++;
*dp++ = sv;
*dp++ = sv;
*dp++ = sv;
}
return dst_ptr;
}
static const uint8_t* //
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h4vn_box(
uint8_t* dst_ptr,
const uint8_t* src_ptr_major,
const uint8_t* src_ptr_minor_ignored,
size_t src_len,
uint32_t h1v2_bias_ignored,
bool first_column_ignored,
bool last_column_ignored) {
uint8_t* dp = dst_ptr;
const uint8_t* sp = src_ptr_major;
while (src_len--) {
uint8_t sv = *sp++;
*dp++ = sv;
*dp++ = sv;
*dp++ = sv;
*dp++ = sv;
}
return dst_ptr;
}
// wuffs_base__pixel_swizzler__swizzle_ycc__upsample_funcs is indexed by inv_h
// and then inv_v.
static const wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_funcs[4][4] = {
{
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h1vn_box,
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h1vn_box,
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h1vn_box,
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h1vn_box,
},
{
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box,
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box,
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box,
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h2vn_box,
},
{
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h3vn_box,
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h3vn_box,
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h3vn_box,
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h3vn_box,
},
{
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h4vn_box,
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h4vn_box,
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h4vn_box,
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_inv_h4vn_box,
},
};
static inline uint32_t //
wuffs_base__pixel_swizzler__has_triangle_upsampler(uint32_t inv_h,
uint32_t inv_v) {
if (inv_h == 1u) {
return inv_v == 2u;
} else if (inv_h == 2u) {
return (inv_v == 1u) || (inv_v == 2u);
}
return false;
}
// --------
// All of the wuffs_base__pixel_swizzler__swizzle_ycc__etc functions have
// preconditions. See all of the checks made in
// wuffs_base__pixel_swizzler__swizzle_ycck before calling these functions. For
// example, (width > 0) is a precondition, but there are many more.
static void //
wuffs_base__pixel_swizzler__swizzle_ycc__general__triangle_filter_single_row(
wuffs_base__pixel_buffer* dst,
uint32_t width,
uint32_t y,
const uint8_t* src_ptr0,
const uint8_t* src_ptr1,
const uint8_t* src_ptr2,
uint32_t stride0,
uint32_t stride1,
uint32_t stride2,
uint32_t inv_h0,
uint32_t inv_h1,
uint32_t inv_h2,
uint32_t inv_v0,
uint32_t inv_v1,
uint32_t inv_v2,
uint32_t half_width_for_2to1,
uint32_t h1v2_bias,
uint8_t* scratch_buffer_2k_ptr,
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc0,
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc1,
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc2) {
const uint8_t* src0 = src_ptr0 + ((y / inv_v0) * (size_t)stride0);
const uint8_t* src1 = src_ptr1 + ((y / inv_v1) * (size_t)stride1);
const uint8_t* src2 = src_ptr2 + ((y / inv_v2) * (size_t)stride2);
uint32_t total_src_len0 = 0u;
uint32_t total_src_len1 = 0u;
uint32_t total_src_len2 = 0u;
uint32_t x = 0u;
while (x < width) {
bool first_column = x == 0u;
uint32_t end = x + 672u;
if (end > width) {
end = width;
}
uint32_t src_len0 = ((end - x) + inv_h0 - 1u) / inv_h0;
uint32_t src_len1 = ((end - x) + inv_h1 - 1u) / inv_h1;
uint32_t src_len2 = ((end - x) + inv_h2 - 1u) / inv_h2;
total_src_len0 += src_len0;
total_src_len1 += src_len1;
total_src_len2 += src_len2;
const uint8_t* src_ptr_x0 = src0 + (x / inv_h0);
const uint8_t* up0 = (*upfunc0)( //
scratch_buffer_2k_ptr + (0u * 672u), //
src_ptr_x0, //
src_ptr_x0, //
src_len0, //
h1v2_bias, //
first_column, //
(total_src_len0 >= half_width_for_2to1));
const uint8_t* src_ptr_x1 = src1 + (x / inv_h1);
const uint8_t* up1 = (*upfunc1)( //
scratch_buffer_2k_ptr + (1u * 672u), //
src_ptr_x1, //
src_ptr_x1, //
src_len1, //
h1v2_bias, //
first_column, //
(total_src_len1 >= half_width_for_2to1));
const uint8_t* src_ptr_x2 = src2 + (x / inv_h2);
const uint8_t* up2 = (*upfunc2)( //
scratch_buffer_2k_ptr + (2u * 672u), //
src_ptr_x2, //
src_ptr_x2, //
src_len2, //
h1v2_bias, //
first_column, //
(total_src_len2 >= half_width_for_2to1));
for (; x < end; x++) {
wuffs_base__pixel_buffer__set_color_u32_at(
dst, x, y,
wuffs_base__color_ycc__as__color_u32(*up0++, *up1++, *up2++));
}
}
}
static void //
wuffs_base__pixel_swizzler__swizzle_ycc__general__triangle_filter(
wuffs_base__pixel_buffer* dst,
uint32_t width,
uint32_t height,
const uint8_t* src_ptr0,
const uint8_t* src_ptr1,
const uint8_t* src_ptr2,
uint32_t stride0,
uint32_t stride1,
uint32_t stride2,
uint32_t inv_h0,
uint32_t inv_h1,
uint32_t inv_h2,
uint32_t inv_v0,
uint32_t inv_v1,
uint32_t inv_v2,
uint32_t half_width_for_2to1,
uint32_t half_height_for_2to1,
uint8_t* scratch_buffer_2k_ptr) {
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc0 =
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_funcs
[(inv_h0 - 1u) & 3u][(inv_v0 - 1u) & 3u];
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc1 =
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_funcs
[(inv_h1 - 1u) & 3u][(inv_v1 - 1u) & 3u];
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_func upfunc2 =
wuffs_base__pixel_swizzler__swizzle_ycc__upsample_funcs
[(inv_h2 - 1u) & 3u][(inv_v2 - 1u) & 3u];
uint32_t y;
for (y = 0u; y < height; y++) {
wuffs_base__pixel_swizzler__swizzle_ycc__general__triangle_filter_single_row(
dst, width, y, //
src_ptr0, src_ptr1, src_ptr2, //
stride0, stride1, stride2, //
inv_h0, inv_h1, inv_h2, //
inv_v0, inv_v1, inv_v2, //
half_width_for_2to1, //
0u, //
scratch_buffer_2k_ptr, //
upfunc0, upfunc1, upfunc2);
}
}
static void //
wuffs_base__pixel_swizzler__swizzle_ycc__general__box_filter(
wuffs_base__pixel_buffer* dst,
uint32_t width,
uint32_t height,
const uint8_t* src_ptr0,
const uint8_t* src_ptr1,
const uint8_t* src_ptr2,
uint32_t stride0,
uint32_t stride1,
uint32_t stride2,
uint32_t inv_h0,
uint32_t inv_h1,
uint32_t inv_h2,
uint32_t inv_v0,
uint32_t inv_v1,
uint32_t inv_v2) {
// Convert an inv_h or inv_v value from {1, 2, 3, 4} to {12, 6, 4, 3}.
uint32_t h0_out_of_12 = 12u / inv_h0;
uint32_t h1_out_of_12 = 12u / inv_h1;
uint32_t h2_out_of_12 = 12u / inv_h2;
uint32_t v0_out_of_12 = 12u / inv_v0;
uint32_t v1_out_of_12 = 12u / inv_v1;
uint32_t v2_out_of_12 = 12u / inv_v2;
uint32_t iy0 = 0u;
uint32_t iy1 = 0u;
uint32_t iy2 = 0u;
uint32_t y = 0u;
while (true) {
const uint8_t* src_iter0 = src_ptr0;
const uint8_t* src_iter1 = src_ptr1;
const uint8_t* src_iter2 = src_ptr2;
uint32_t ix0 = 0u;
uint32_t ix1 = 0u;
uint32_t ix2 = 0u;
uint32_t x = 0u;
while (true) {
wuffs_base__pixel_buffer__set_color_u32_at(
dst, x, y,
wuffs_base__color_ycc__as__color_u32(*src_iter0, *src_iter1,
*src_iter2));
if ((x + 1u) == width) {
break;
}
x = x + 1u;
ix0 += h0_out_of_12;
if (ix0 >= 12u) {
ix0 = 0u;
src_iter0++;
}
ix1 += h1_out_of_12;
if (ix1 >= 12u) {
ix1 = 0u;
src_iter1++;
}
ix2 += h2_out_of_12;
if (ix2 >= 12u) {
ix2 = 0u;
src_iter2++;
}
}
if ((y + 1u) == height) {
break;
}
y = y + 1u;
iy0 += v0_out_of_12;
if (iy0 >= 12u) {
iy0 = 0u;
src_ptr0 += stride0;
}
iy1 += v1_out_of_12;
if (iy1 >= 12u) {
iy1 = 0u;
src_ptr1 += stride1;
}
iy2 += v2_out_of_12;
if (iy2 >= 12u) {
iy2 = 0u;
src_ptr2 += stride2;
}
}
}
// --------
// wuffs_base__pixel_swizzler__flattened_length is like
// wuffs_base__table__flattened_length but returns uint64_t (not size_t) and
// also accounts for subsampling.
static uint64_t //
wuffs_base__pixel_swizzler__flattened_length(uint32_t width,
uint32_t height,
uint32_t stride,
uint32_t inv_h,
uint32_t inv_v) {
uint64_t scaled_width = (((uint64_t)width) + (inv_h - 1u)) / inv_h;
uint64_t scaled_height = (((uint64_t)height) + (inv_v - 1u)) / inv_v;
if (scaled_height <= 0u) {
return 0u;
}
return ((scaled_height - 1u) * stride) + scaled_width;
}
WUFFS_BASE__MAYBE_STATIC wuffs_base__status //
wuffs_base__pixel_swizzler__swizzle_ycck(
const wuffs_base__pixel_swizzler* p,
wuffs_base__pixel_buffer* dst,
wuffs_base__slice_u8 dst_palette,
uint32_t width,
uint32_t height,
wuffs_base__slice_u8 src0,
wuffs_base__slice_u8 src1,
wuffs_base__slice_u8 src2,
wuffs_base__slice_u8 src3,
uint32_t width0,
uint32_t width1,
uint32_t width2,
uint32_t width3,
uint32_t height0,
uint32_t height1,
uint32_t height2,
uint32_t height3,
uint32_t stride0,
uint32_t stride1,
uint32_t stride2,
uint32_t stride3,
uint8_t h0,
uint8_t h1,
uint8_t h2,
uint8_t h3,
uint8_t v0,
uint8_t v1,
uint8_t v2,
uint8_t v3,
bool triangle_filter_for_2to1,
wuffs_base__slice_u8 scratch_buffer_2k) {
if (!p) {
return wuffs_base__make_status(wuffs_base__error__bad_receiver);
} else if ((h3 != 0u) || (v3 != 0u)) {
// TODO: support the K in YCCK.
return wuffs_base__make_status(
wuffs_base__error__unsupported_pixel_swizzler_option);
} else if (!dst || (width > 0xFFFFu) || (height > 0xFFFFu) || //
(4u <= (h0 - 1u)) || (4u <= (v0 - 1u)) || //
(4u <= (h1 - 1u)) || (4u <= (v1 - 1u)) || //
(4u <= (h2 - 1u)) || (4u <= (v2 - 1u)) || //
(scratch_buffer_2k.len < 2048u)) {
return wuffs_base__make_status(wuffs_base__error__bad_argument);
}
uint32_t max_incl_h = wuffs_base__u32__max_of_4(h0, h1, h2, h3);
uint32_t max_incl_v = wuffs_base__u32__max_of_4(v0, v1, v2, v3);
// Calculate the inverse h and v ratios.
//
// It also canonicalizes (h=2 and max_incl_h=4) as equivalent to (h=1 and
// max_incl_h=2). In both cases, the inv_h value is 2.
uint32_t inv_h0 = max_incl_h / h0;
uint32_t inv_h1 = max_incl_h / h1;
uint32_t inv_h2 = max_incl_h / h2;
uint32_t inv_v0 = max_incl_v / v0;
uint32_t inv_v1 = max_incl_v / v1;
uint32_t inv_v2 = max_incl_v / v2;
uint32_t half_width_for_2to1 = (width + 1u) / 2u;
uint32_t half_height_for_2to1 = (height + 1u) / 2u;
width = wuffs_base__u32__min_of_5( //
width, //
width0 * inv_h0, //
width1 * inv_h1, //
width2 * inv_h2, //
wuffs_base__pixel_config__width(&dst->pixcfg));
height = wuffs_base__u32__min_of_5( //
height, //
height0 * inv_v0, //
height1 * inv_v1, //
height2 * inv_v2, //
wuffs_base__pixel_config__height(&dst->pixcfg));
if (((h0 * inv_h0) != max_incl_h) || //
((h1 * inv_h1) != max_incl_h) || //
((h2 * inv_h2) != max_incl_h) || //
((v0 * inv_v0) != max_incl_v) || //
((v1 * inv_v1) != max_incl_v) || //
((v2 * inv_v2) != max_incl_v) || //
(src0.len < wuffs_base__pixel_swizzler__flattened_length(
width, height, stride0, inv_h0, inv_v0)) ||
(src1.len < wuffs_base__pixel_swizzler__flattened_length(
width, height, stride1, inv_h1, inv_v1)) ||
(src2.len < wuffs_base__pixel_swizzler__flattened_length(
width, height, stride2, inv_h2, inv_v2))) {
return wuffs_base__make_status(wuffs_base__error__bad_argument);
}
if (wuffs_base__pixel_format__is_planar(&dst->pixcfg.private_impl.pixfmt)) {
// TODO: see wuffs_base__pixel_buffer__set_color_u32_at's TODO.
return wuffs_base__make_status(
wuffs_base__error__unsupported_pixel_swizzler_option);
}
switch (dst->pixcfg.private_impl.pixfmt.repr) {
case WUFFS_BASE__PIXEL_FORMAT__Y:
case WUFFS_BASE__PIXEL_FORMAT__Y_16LE:
case WUFFS_BASE__PIXEL_FORMAT__Y_16BE:
case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:
case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:
case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:
case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
case WUFFS_BASE__PIXEL_FORMAT__BGR:
case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
case WUFFS_BASE__PIXEL_FORMAT__BGRX:
case WUFFS_BASE__PIXEL_FORMAT__RGB:
case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
case WUFFS_BASE__PIXEL_FORMAT__RGBX:
break;
default:
// TODO: see wuffs_base__pixel_buffer__set_color_u32_at's TODO.
return wuffs_base__make_status(
wuffs_base__error__unsupported_pixel_swizzler_option);
}
if ((width <= 0u) || (height <= 0u)) {
return wuffs_base__make_status(NULL);
}
if (triangle_filter_for_2to1 &&
(wuffs_base__pixel_swizzler__has_triangle_upsampler(inv_h0, inv_v0) ||
wuffs_base__pixel_swizzler__has_triangle_upsampler(inv_h1, inv_v1) ||
wuffs_base__pixel_swizzler__has_triangle_upsampler(inv_h2, inv_v2))) {
wuffs_base__pixel_swizzler__swizzle_ycc__general__triangle_filter(
dst, width, height, //
src0.ptr, src1.ptr, src2.ptr, //
stride0, stride1, stride2, //
inv_h0, inv_h1, inv_h2, //
inv_v0, inv_v1, inv_v2, //
half_width_for_2to1, half_height_for_2to1, //
scratch_buffer_2k.ptr);
} else {
wuffs_base__pixel_swizzler__swizzle_ycc__general__box_filter(
dst, width, height, //
src0.ptr, src1.ptr, src2.ptr, //
stride0, stride1, stride2, //
inv_h0, inv_h1, inv_h2, //
inv_v0, inv_v1, inv_v2);
}
return wuffs_base__make_status(NULL);
}