Prepare to spin out base submodules
diff --git a/internal/cgen/base/all-impl.c b/internal/cgen/base/all-impl.c
index 0ba6e6e..894207d 100644
--- a/internal/cgen/base/all-impl.c
+++ b/internal/cgen/base/all-impl.c
@@ -118,6 +118,20 @@
 #endif  // !defined(WUFFS_CONFIG__MODULES) ||
         // defined(WUFFS_CONFIG__MODULE__BASE)
 
+#if !defined(WUFFS_CONFIG__MODULES) || defined(WUFFS_CONFIG__MODULE__BASE)
+
+// !! INSERT base/f64conv-submodule.c.
+
+#endif  // !defined(WUFFS_CONFIG__MODULES) ||
+        // defined(WUFFS_CONFIG__MODULE__BASE)
+
+#if !defined(WUFFS_CONFIG__MODULES) || defined(WUFFS_CONFIG__MODULE__BASE)
+
+// !! INSERT base/pixconv-submodule.c.
+
+#endif  // !defined(WUFFS_CONFIG__MODULES) ||
+        // defined(WUFFS_CONFIG__MODULE__BASE)
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/internal/cgen/base/f64conv-submodule.c b/internal/cgen/base/f64conv-submodule.c
new file mode 100644
index 0000000..3bea12c
--- /dev/null
+++ b/internal/cgen/base/f64conv-submodule.c
@@ -0,0 +1,1264 @@
+// After editing this file, run "go generate" in the parent directory.
+
+// Copyright 2020 The Wuffs Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// ---------------- IEEE 754 Floating Point
+
+#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE 1023
+#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION 500
+
+// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL is the largest N
+// such that ((10 << N) < (1 << 64)).
+#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL 60
+
+// wuffs_base__private_implementation__high_prec_dec (abbreviated as HPD) is a
+// fixed precision floating point decimal number, augmented with ±infinity
+// values, but it cannot represent NaN (Not a Number).
+//
+// "High precision" means that the mantissa holds 500 decimal digits. 500 is
+// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION.
+//
+// An HPD isn't for general purpose arithmetic, only for conversions to and
+// from IEEE 754 double-precision floating point, where the largest and
+// smallest positive, finite values are approximately 1.8e+308 and 4.9e-324.
+// HPD exponents above +1023 mean infinity, below -1023 mean zero. The ±1023
+// bounds are further away from zero than ±(324 + 500), where 500 and 1023 is
+// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION and
+// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.
+//
+// digits[.. num_digits] are the number's digits in big-endian order. The
+// uint8_t values are in the range [0 ..= 9], not ['0' ..= '9'], where e.g. '7'
+// is the ASCII value 0x37.
+//
+// decimal_point is the index (within digits) of the decimal point. It may be
+// negative or be larger than num_digits, in which case the explicit digits are
+// padded with implicit zeroes.
+//
+// For example, if num_digits is 3 and digits is "\x07\x08\x09":
+//   - A decimal_point of -2 means ".00789"
+//   - A decimal_point of -1 means ".0789"
+//   - A decimal_point of +0 means ".789"
+//   - A decimal_point of +1 means "7.89"
+//   - A decimal_point of +2 means "78.9"
+//   - A decimal_point of +3 means "789."
+//   - A decimal_point of +4 means "7890."
+//   - A decimal_point of +5 means "78900."
+//
+// As above, a decimal_point higher than +1023 means that the overall value is
+// infinity, lower than -1023 means zero.
+//
+// negative is a sign bit. An HPD can distinguish positive and negative zero.
+//
+// truncated is whether there are more than
+// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION digits, and at
+// least one of those extra digits are non-zero. The existence of long-tail
+// digits can affect rounding.
+//
+// The "all fields are zero" value is valid, and represents the number +0.
+typedef struct {
+  uint32_t num_digits;
+  int32_t decimal_point;
+  bool negative;
+  bool truncated;
+  uint8_t digits[WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION];
+} wuffs_base__private_implementation__high_prec_dec;
+
+// wuffs_base__private_implementation__high_prec_dec__trim trims trailing
+// zeroes from the h->digits[.. h->num_digits] slice. They have no benefit,
+// since we explicitly track h->decimal_point.
+//
+// Preconditions:
+//  - h is non-NULL.
+static inline void  //
+wuffs_base__private_implementation__high_prec_dec__trim(
+    wuffs_base__private_implementation__high_prec_dec* h) {
+  while ((h->num_digits > 0) && (h->digits[h->num_digits - 1] == 0)) {
+    h->num_digits--;
+  }
+}
+
+static wuffs_base__status  //
+wuffs_base__private_implementation__high_prec_dec__parse(
+    wuffs_base__private_implementation__high_prec_dec* h,
+    wuffs_base__slice_u8 s) {
+  if (!h) {
+    return wuffs_base__make_status(wuffs_base__error__bad_receiver);
+  }
+  h->num_digits = 0;
+  h->decimal_point = 0;
+  h->negative = false;
+  h->truncated = false;
+
+  uint8_t* p = s.ptr;
+  uint8_t* q = s.ptr + s.len;
+
+  for (; (p < q) && (*p == '_'); p++) {
+  }
+  if (p >= q) {
+    return wuffs_base__make_status(wuffs_base__error__bad_argument);
+  }
+
+  // Parse sign.
+  do {
+    if (*p == '+') {
+      p++;
+    } else if (*p == '-') {
+      h->negative = true;
+      p++;
+    } else {
+      break;
+    }
+    for (; (p < q) && (*p == '_'); p++) {
+    }
+  } while (0);
+
+  // Parse digits.
+  uint32_t nd = 0;
+  int32_t dp = 0;
+  bool saw_digits = false;
+  bool saw_non_zero_digits = false;
+  bool saw_dot = false;
+  for (; p < q; p++) {
+    if (*p == '_') {
+      // No-op.
+
+    } else if ((*p == '.') || (*p == ',')) {
+      // As per https://en.wikipedia.org/wiki/Decimal_separator, both '.' or
+      // ',' are commonly used. We just parse either, regardless of LOCALE.
+      if (saw_dot) {
+        return wuffs_base__make_status(wuffs_base__error__bad_argument);
+      }
+      saw_dot = true;
+      dp = (int32_t)nd;
+
+    } else if ('0' == *p) {
+      if (!saw_dot && !saw_non_zero_digits && saw_digits) {
+        // We don't allow unnecessary leading zeroes: "000123" or "0644".
+        return wuffs_base__make_status(wuffs_base__error__bad_argument);
+      }
+      saw_digits = true;
+      if (nd == 0) {
+        // Track leading zeroes implicitly.
+        dp--;
+      } else if (nd <
+                 WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
+        h->digits[nd++] = 0;
+      } else {
+        // Long-tail zeroes are ignored.
+      }
+
+    } else if (('0' < *p) && (*p <= '9')) {
+      if (!saw_dot && !saw_non_zero_digits && saw_digits) {
+        // We don't allow unnecessary leading zeroes: "000123" or "0644".
+        return wuffs_base__make_status(wuffs_base__error__bad_argument);
+      }
+      saw_digits = true;
+      saw_non_zero_digits = true;
+      if (nd < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
+        h->digits[nd++] = (uint8_t)(*p - '0');
+      } else {
+        // Long-tail non-zeroes set the truncated bit.
+        h->truncated = true;
+      }
+
+    } else {
+      break;
+    }
+  }
+
+  if (!saw_digits) {
+    return wuffs_base__make_status(wuffs_base__error__bad_argument);
+  }
+  if (!saw_dot) {
+    dp = (int32_t)nd;
+  }
+
+  // Parse exponent.
+  if ((p < q) && ((*p == 'E') || (*p == 'e'))) {
+    p++;
+    for (; (p < q) && (*p == '_'); p++) {
+    }
+    if (p >= q) {
+      return wuffs_base__make_status(wuffs_base__error__bad_argument);
+    }
+
+    int32_t exp_sign = +1;
+    if (*p == '+') {
+      p++;
+    } else if (*p == '-') {
+      exp_sign = -1;
+      p++;
+    }
+
+    int32_t exp = 0;
+    const int32_t exp_large =
+        WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE +
+        WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION;
+    bool saw_exp_digits = false;
+    for (; p < q; p++) {
+      if (*p == '_') {
+        // No-op.
+      } else if (('0' <= *p) && (*p <= '9')) {
+        saw_exp_digits = true;
+        if (exp < exp_large) {
+          exp = (10 * exp) + ((int32_t)(*p - '0'));
+        }
+      } else {
+        break;
+      }
+    }
+    if (!saw_exp_digits) {
+      return wuffs_base__make_status(wuffs_base__error__bad_argument);
+    }
+    dp += exp_sign * exp;
+  }
+
+  // Finish.
+  if (p != q) {
+    return wuffs_base__make_status(wuffs_base__error__bad_argument);
+  }
+  h->num_digits = nd;
+  if (nd == 0) {
+    h->decimal_point = 0;
+  } else if (dp <
+             -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
+    h->decimal_point =
+        -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE - 1;
+  } else if (dp >
+             +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
+    h->decimal_point =
+        +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE + 1;
+  } else {
+    h->decimal_point = dp;
+  }
+  wuffs_base__private_implementation__high_prec_dec__trim(h);
+  return wuffs_base__make_status(NULL);
+}
+
+// --------
+
+// The etc__hpd_left_shift and etc__powers_of_5 tables were printed by
+// script/print-hpd-left-shift.go. That script has an optional -comments flag,
+// whose output is not copied here, which prints further detail.
+//
+// These tables are used in
+// wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits.
+
+// wuffs_base__private_implementation__hpd_left_shift[i] encodes the number of
+// new digits created after multiplying a positive integer by (1 << i): the
+// additional length in the decimal representation. For example, shifting "234"
+// by 3 (equivalent to multiplying by 8) will produce "1872". Going from a
+// 3-length string to a 4-length string means that 1 new digit was added (and
+// existing digits may have changed).
+//
+// Shifting by i can add either N or N-1 new digits, depending on whether the
+// original positive integer compares >= or < to the i'th power of 5 (as 10
+// equals 2 * 5). Comparison is lexicographic, not numerical.
+//
+// For example, shifting by 4 (i.e. multiplying by 16) can add 1 or 2 new
+// digits, depending on a lexicographic comparison to (5 ** 4), i.e. "625":
+//  - ("1"      << 4) is "16",       which adds 1 new digit.
+//  - ("5678"   << 4) is "90848",    which adds 1 new digit.
+//  - ("624"    << 4) is "9984",     which adds 1 new digit.
+//  - ("62498"  << 4) is "999968",   which adds 1 new digit.
+//  - ("625"    << 4) is "10000",    which adds 2 new digits.
+//  - ("625001" << 4) is "10000016", which adds 2 new digits.
+//  - ("7008"   << 4) is "112128",   which adds 2 new digits.
+//  - ("99"     << 4) is "1584",     which adds 2 new digits.
+//
+// Thus, when i is 4, N is 2 and (5 ** i) is "625". This etc__hpd_left_shift
+// array encodes this as:
+//  - etc__hpd_left_shift[4] is 0x1006 = (2 << 11) | 0x0006.
+//  - etc__hpd_left_shift[5] is 0x1009 = (? << 11) | 0x0009.
+// where the ? isn't relevant for i == 4.
+//
+// The high 5 bits of etc__hpd_left_shift[i] is N, the higher of the two
+// possible number of new digits. The low 11 bits are an offset into the
+// etc__powers_of_5 array (of length 0x051C, so offsets fit in 11 bits). When i
+// is 4, its offset and the next one is 6 and 9, and etc__powers_of_5[6 .. 9]
+// is the string "\x06\x02\x05", so the relevant power of 5 is "625".
+//
+// Thanks to Ken Thompson for the original idea.
+static const uint16_t wuffs_base__private_implementation__hpd_left_shift[65] = {
+    0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817,
+    0x181D, 0x2024, 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067,
+    0x3073, 0x3080, 0x388E, 0x389C, 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF,
+    0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, 0x5180, 0x5998, 0x59B0,
+    0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, 0x72AA,
+    0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC,
+    0x8C02, 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C,
+    0x051C, 0x051C,
+};
+
+// wuffs_base__private_implementation__powers_of_5 contains the powers of 5,
+// concatenated together: "5", "25", "125", "625", "3125", etc.
+static const uint8_t wuffs_base__private_implementation__powers_of_5[0x051C] = {
+    5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, 3, 9,
+    0, 6, 2, 5, 1, 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, 2, 8, 1, 2,
+    5, 2, 4, 4, 1, 4, 0, 6, 2, 5, 1, 2, 2, 0, 7, 0, 3, 1, 2, 5, 6, 1, 0, 3, 5,
+    1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, 5, 1, 5, 2, 5, 8, 7, 8, 9, 0,
+    6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, 3, 8, 1, 4, 6, 9, 7, 2, 6, 5,
+    6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, 8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1,
+    6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, 7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4,
+    1, 8, 5, 7, 9, 1, 0, 1, 5, 6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7,
+    8, 1, 2, 5, 5, 9, 6, 0, 4, 6, 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0,
+    2, 3, 2, 2, 3, 8, 7, 6, 9, 5, 3, 1, 2, 5, 1, 4, 9, 0, 1, 1, 6, 1, 1, 9, 3,
+    8, 4, 7, 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2, 3, 8, 2, 8, 1,
+    2, 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, 6,
+    2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3, 2, 2, 5,
+    7, 4, 6, 1, 5, 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1, 2, 8, 7, 3, 0,
+    7, 7, 3, 9, 2, 5, 7, 8, 1, 2, 5, 2, 3, 2, 8, 3, 0, 6, 4, 3, 6, 5, 3, 8, 6,
+    9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5, 3, 2, 1, 8, 2, 6, 9, 3, 4, 8,
+    1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6, 6, 0, 9, 1, 3, 4, 6, 7, 4, 0, 7,
+    2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3, 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6,
+    1, 3, 2, 8, 1, 2, 5, 1, 4, 5, 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8,
+    0, 6, 6, 4, 0, 6, 2, 5, 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9,
+    0, 3, 3, 2, 0, 3, 1, 2, 5, 3, 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2,
+    9, 5, 1, 6, 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, 9, 8, 9, 4, 0, 3, 5, 4, 5, 8,
+    5, 6, 4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, 7, 0, 1, 7, 7,
+    2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, 3, 5,
+    0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, 2, 2, 7, 3,
+    7, 3, 6, 7, 5, 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, 9, 7, 6, 5, 6, 2,
+    5, 1, 1, 3, 6, 8, 6, 8, 3, 7, 7, 2, 1, 6, 1, 6, 0, 2, 9, 7, 3, 9, 3, 7, 9,
+    8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8, 8, 6, 0, 8, 0, 8, 0, 1, 4, 8,
+    6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, 2, 8, 4, 2, 1, 7, 0, 9, 4, 3, 0, 4,
+    0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, 9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0,
+    8, 5, 4, 7, 1, 5, 2, 0, 2, 0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5,
+    6, 2, 5, 7, 1, 0, 5, 4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1,
+    2, 4, 2, 6, 7, 5, 7, 8, 1, 2, 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5,
+    0, 0, 9, 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8, 9, 0, 6, 2, 5, 1, 7, 7, 6, 3,
+    5, 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0, 6, 6, 8, 9, 4,
+    5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3, 2, 3, 3,
+    8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8, 9, 2, 0, 9, 8,
+    5, 0, 0, 6, 2, 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2, 3, 6, 3, 2, 8, 1, 2,
+    5, 2, 2, 2, 0, 4, 4, 6, 0, 4, 9, 2, 5, 0, 3, 1, 3, 0, 8, 0, 8, 4, 7, 2, 6,
+    3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1, 1, 1, 0, 2, 2, 3, 0, 2, 4, 6, 2,
+    5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3, 1, 6, 6, 8, 0, 9, 0, 8, 2, 0, 3, 1, 2,
+    5, 5, 5, 5, 1, 1, 1, 5, 1, 2, 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5,
+    8, 3, 4, 0, 4, 5, 4, 1, 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5,
+    6, 2, 8, 9, 1, 3, 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8,
+    1, 2, 5, 1, 3, 8, 7, 7, 7, 8, 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9,
+    5, 3, 9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, 6, 2, 5, 6, 9, 3, 8, 8, 9, 3,
+    9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, 5, 5, 6, 7, 6,
+    2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, 6, 1, 4, 1,
+    8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, 6, 5, 6, 2, 5, 1,
+    7, 3, 4, 7, 2, 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, 4, 4, 1, 1, 9, 2, 4, 4,
+    8, 1, 3, 9, 1, 9, 0, 6, 7, 3, 8, 2, 8, 1, 2, 5, 8, 6, 7, 3, 6, 1, 7, 3, 7,
+    9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, 6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3,
+    6, 9, 1, 4, 0, 6, 2, 5,
+};
+
+// wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits
+// returns the number of additional decimal digits when left-shifting by shift.
+//
+// See below for preconditions.
+static uint32_t  //
+wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits(
+    wuffs_base__private_implementation__high_prec_dec* h,
+    uint32_t shift) {
+  // Masking with 0x3F should be unnecessary (assuming the preconditions) but
+  // it's cheap and ensures that we don't overflow the
+  // wuffs_base__private_implementation__hpd_left_shift array.
+  shift &= 63;
+
+  uint32_t x_a = wuffs_base__private_implementation__hpd_left_shift[shift];
+  uint32_t x_b = wuffs_base__private_implementation__hpd_left_shift[shift + 1];
+  uint32_t num_new_digits = x_a >> 11;
+  uint32_t pow5_a = 0x7FF & x_a;
+  uint32_t pow5_b = 0x7FF & x_b;
+
+  const uint8_t* pow5 =
+      &wuffs_base__private_implementation__powers_of_5[pow5_a];
+  uint32_t i = 0;
+  uint32_t n = pow5_b - pow5_a;
+  for (; i < n; i++) {
+    if (i >= h->num_digits) {
+      return num_new_digits - 1;
+    } else if (h->digits[i] == pow5[i]) {
+      continue;
+    } else if (h->digits[i] < pow5[i]) {
+      return num_new_digits - 1;
+    } else {
+      return num_new_digits;
+    }
+  }
+  return num_new_digits;
+}
+
+// --------
+
+// wuffs_base__private_implementation__high_prec_dec__rounded_integer returns
+// the integral (non-fractional) part of h, provided that it is 18 or fewer
+// decimal digits. For 19 or more digits, it returns UINT64_MAX. Note that:
+//   - (1 << 53) is    9007199254740992, which has 16 decimal digits.
+//   - (1 << 56) is   72057594037927936, which has 17 decimal digits.
+//   - (1 << 59) is  576460752303423488, which has 18 decimal digits.
+//   - (1 << 63) is 9223372036854775808, which has 19 decimal digits.
+// and that IEEE 754 double precision has 52 mantissa bits.
+//
+// That integral part is rounded-to-even: rounding 7.5 or 8.5 both give 8.
+//
+// h's negative bit is ignored: rounding -8.6 returns 9.
+//
+// See below for preconditions.
+static uint64_t  //
+wuffs_base__private_implementation__high_prec_dec__rounded_integer(
+    wuffs_base__private_implementation__high_prec_dec* h) {
+  if ((h->num_digits == 0) || (h->decimal_point < 0)) {
+    return 0;
+  } else if (h->decimal_point > 18) {
+    return UINT64_MAX;
+  }
+
+  uint32_t dp = (uint32_t)(h->decimal_point);
+  uint64_t n = 0;
+  uint32_t i = 0;
+  for (; i < dp; i++) {
+    n = (10 * n) + ((i < h->num_digits) ? h->digits[i] : 0);
+  }
+
+  bool round_up = false;
+  if (dp < h->num_digits) {
+    round_up = h->digits[dp] >= 5;
+    if ((h->digits[dp] == 5) && (dp + 1 == h->num_digits)) {
+      // We are exactly halfway. If we're truncated, round up, otherwise round
+      // to even.
+      round_up = h->truncated ||  //
+                 ((dp > 0) && (1 & h->digits[dp - 1]));
+    }
+  }
+  if (round_up) {
+    n++;
+  }
+
+  return n;
+}
+
+// wuffs_base__private_implementation__high_prec_dec__small_xshift shifts h's
+// number (where 'x' is 'l' or 'r' for left or right) by a small shift value.
+//
+// Preconditions:
+//  - h is non-NULL.
+//  - h->decimal_point is "not extreme".
+//  - shift is non-zero.
+//  - shift is "a small shift".
+//
+// "Not extreme" means within
+// ±WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.
+//
+// "A small shift" means not more than
+// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL.
+//
+// wuffs_base__private_implementation__high_prec_dec__rounded_integer and
+// wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits
+// have the same preconditions.
+
+static void  //
+wuffs_base__private_implementation__high_prec_dec__small_lshift(
+    wuffs_base__private_implementation__high_prec_dec* h,
+    uint32_t shift) {
+  if (h->num_digits == 0) {
+    return;
+  }
+  uint32_t num_new_digits =
+      wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits(
+          h, shift);
+  uint32_t rx = h->num_digits - 1;                   // Read  index.
+  uint32_t wx = h->num_digits - 1 + num_new_digits;  // Write index.
+  uint64_t n = 0;
+
+  // Repeat: pick up a digit, put down a digit, right to left.
+  while (((int32_t)rx) >= 0) {
+    n += ((uint64_t)(h->digits[rx])) << shift;
+    uint64_t quo = n / 10;
+    uint64_t rem = n - (10 * quo);
+    if (wx < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
+      h->digits[wx] = (uint8_t)rem;
+    } else if (rem > 0) {
+      h->truncated = true;
+    }
+    n = quo;
+    wx--;
+    rx--;
+  }
+
+  // Put down leading digits, right to left.
+  while (n > 0) {
+    uint64_t quo = n / 10;
+    uint64_t rem = n - (10 * quo);
+    if (wx < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
+      h->digits[wx] = (uint8_t)rem;
+    } else if (rem > 0) {
+      h->truncated = true;
+    }
+    n = quo;
+    wx--;
+  }
+
+  // Finish.
+  h->num_digits += num_new_digits;
+  if (h->num_digits >
+      WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
+    h->num_digits = WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION;
+  }
+  h->decimal_point += (int32_t)num_new_digits;
+  wuffs_base__private_implementation__high_prec_dec__trim(h);
+}
+
+static void  //
+wuffs_base__private_implementation__high_prec_dec__small_rshift(
+    wuffs_base__private_implementation__high_prec_dec* h,
+    uint32_t shift) {
+  uint32_t rx = 0;  // Read  index.
+  uint32_t wx = 0;  // Write index.
+  uint64_t n = 0;
+
+  // Pick up enough leading digits to cover the first shift.
+  while ((n >> shift) == 0) {
+    if (rx < h->num_digits) {
+      // Read a digit.
+      n = (10 * n) + h->digits[rx++];
+    } else if (n == 0) {
+      // h's number used to be zero and remains zero.
+      return;
+    } else {
+      // Read sufficient implicit trailing zeroes.
+      while ((n >> shift) == 0) {
+        n = 10 * n;
+        rx++;
+      }
+      break;
+    }
+  }
+  h->decimal_point -= ((int32_t)(rx - 1));
+  if (h->decimal_point <
+      -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
+    // After the shift, h's number is effectively zero.
+    h->num_digits = 0;
+    h->decimal_point = 0;
+    h->negative = false;
+    h->truncated = false;
+    return;
+  }
+
+  // Repeat: pick up a digit, put down a digit, left to right.
+  uint64_t mask = (((uint64_t)(1)) << shift) - 1;
+  while (rx < h->num_digits) {
+    uint8_t new_digit = ((uint8_t)(n >> shift));
+    n = (10 * (n & mask)) + h->digits[rx++];
+    h->digits[wx++] = new_digit;
+  }
+
+  // Put down trailing digits, left to right.
+  while (n > 0) {
+    uint8_t new_digit = ((uint8_t)(n >> shift));
+    n = 10 * (n & mask);
+    if (wx < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
+      h->digits[wx++] = new_digit;
+    } else if (new_digit > 0) {
+      h->truncated = true;
+    }
+  }
+
+  // Finish.
+  h->num_digits = wx;
+  wuffs_base__private_implementation__high_prec_dec__trim(h);
+}
+
+// --------
+
+// The wuffs_base__private_implementation__etc_powers_of_10 tables were printed
+// by script/print-mpb-powers-of-10.go. That script has an optional -comments
+// flag, whose output is not copied here, which prints further detail.
+//
+// These tables are used in
+// wuffs_base__private_implementation__medium_prec_bin__assign_from_hpd.
+
+// wuffs_base__private_implementation__big_powers_of_10 contains approximations
+// to the powers of 10, ranging from 1e-348 to 1e+340, with the exponent
+// stepping by 8: -348, -340, -332, ..., -12, -4, +4, +12, ..., +340. Each step
+// consists of three uint32_t elements. There are 87 triples, 87 * 3 = 261.
+//
+// For example, the third approximation, for 1e-332, consists of the uint32_t
+// triple (0x3055AC76, 0x8B16FB20, 0xFFFFFB72). The first two of that triple
+// are a little-endian uint64_t value: 0x8B16FB203055AC76. The last one is an
+// int32_t value: -1166. Together, they represent the approximation:
+//   1e-332 ≈ 0x8B16FB203055AC76 * (2 ** -1166)
+// Similarly, the (0x00000000, 0x9C400000, 0xFFFFFFCE) uint32_t triple means:
+//   1e+4   ≈ 0x9C40000000000000 * (2 **   -50)  // This approx'n is exact.
+// Similarly, the (0xD4C4FB27, 0xED63A231, 0x000000A2) uint32_t triple means:
+//   1e+68  ≈ 0xED63A231D4C4FB27 * (2 **   162)
+static const uint32_t
+    wuffs_base__private_implementation__big_powers_of_10[261] = {
+        0x081C0288, 0xFA8FD5A0, 0xFFFFFB3C, 0xA23EBF76, 0xBAAEE17F, 0xFFFFFB57,
+        0x3055AC76, 0x8B16FB20, 0xFFFFFB72, 0x5DCE35EA, 0xCF42894A, 0xFFFFFB8C,
+        0x55653B2D, 0x9A6BB0AA, 0xFFFFFBA7, 0x3D1A45DF, 0xE61ACF03, 0xFFFFFBC1,
+        0xC79AC6CA, 0xAB70FE17, 0xFFFFFBDC, 0xBEBCDC4F, 0xFF77B1FC, 0xFFFFFBF6,
+        0x416BD60C, 0xBE5691EF, 0xFFFFFC11, 0x907FFC3C, 0x8DD01FAD, 0xFFFFFC2C,
+        0x31559A83, 0xD3515C28, 0xFFFFFC46, 0xADA6C9B5, 0x9D71AC8F, 0xFFFFFC61,
+        0x23EE8BCB, 0xEA9C2277, 0xFFFFFC7B, 0x4078536D, 0xAECC4991, 0xFFFFFC96,
+        0x5DB6CE57, 0x823C1279, 0xFFFFFCB1, 0x4DFB5637, 0xC2109436, 0xFFFFFCCB,
+        0x3848984F, 0x9096EA6F, 0xFFFFFCE6, 0x25823AC7, 0xD77485CB, 0xFFFFFD00,
+        0x97BF97F4, 0xA086CFCD, 0xFFFFFD1B, 0x172AACE5, 0xEF340A98, 0xFFFFFD35,
+        0x2A35B28E, 0xB23867FB, 0xFFFFFD50, 0xD2C63F3B, 0x84C8D4DF, 0xFFFFFD6B,
+        0x1AD3CDBA, 0xC5DD4427, 0xFFFFFD85, 0xBB25C996, 0x936B9FCE, 0xFFFFFDA0,
+        0x7D62A584, 0xDBAC6C24, 0xFFFFFDBA, 0x0D5FDAF6, 0xA3AB6658, 0xFFFFFDD5,
+        0xDEC3F126, 0xF3E2F893, 0xFFFFFDEF, 0xAAFF80B8, 0xB5B5ADA8, 0xFFFFFE0A,
+        0x6C7C4A8B, 0x87625F05, 0xFFFFFE25, 0x34C13053, 0xC9BCFF60, 0xFFFFFE3F,
+        0x91BA2655, 0x964E858C, 0xFFFFFE5A, 0x70297EBD, 0xDFF97724, 0xFFFFFE74,
+        0xB8E5B88F, 0xA6DFBD9F, 0xFFFFFE8F, 0x88747D94, 0xF8A95FCF, 0xFFFFFEA9,
+        0x8FA89BCF, 0xB9447093, 0xFFFFFEC4, 0xBF0F156B, 0x8A08F0F8, 0xFFFFFEDF,
+        0x653131B6, 0xCDB02555, 0xFFFFFEF9, 0xD07B7FAC, 0x993FE2C6, 0xFFFFFF14,
+        0x2A2B3B06, 0xE45C10C4, 0xFFFFFF2E, 0x697392D3, 0xAA242499, 0xFFFFFF49,
+        0x8300CA0E, 0xFD87B5F2, 0xFFFFFF63, 0x92111AEB, 0xBCE50864, 0xFFFFFF7E,
+        0x6F5088CC, 0x8CBCCC09, 0xFFFFFF99, 0xE219652C, 0xD1B71758, 0xFFFFFFB3,
+        0x00000000, 0x9C400000, 0xFFFFFFCE, 0x00000000, 0xE8D4A510, 0xFFFFFFE8,
+        0xAC620000, 0xAD78EBC5, 0x00000003, 0xF8940984, 0x813F3978, 0x0000001E,
+        0xC90715B3, 0xC097CE7B, 0x00000038, 0x7BEA5C70, 0x8F7E32CE, 0x00000053,
+        0xABE98068, 0xD5D238A4, 0x0000006D, 0x179A2245, 0x9F4F2726, 0x00000088,
+        0xD4C4FB27, 0xED63A231, 0x000000A2, 0x8CC8ADA8, 0xB0DE6538, 0x000000BD,
+        0x1AAB65DB, 0x83C7088E, 0x000000D8, 0x42711D9A, 0xC45D1DF9, 0x000000F2,
+        0xA61BE758, 0x924D692C, 0x0000010D, 0x1A708DEA, 0xDA01EE64, 0x00000127,
+        0x9AEF774A, 0xA26DA399, 0x00000142, 0xB47D6B85, 0xF209787B, 0x0000015C,
+        0x79DD1877, 0xB454E4A1, 0x00000177, 0x5B9BC5C2, 0x865B8692, 0x00000192,
+        0xC8965D3D, 0xC83553C5, 0x000001AC, 0xFA97A0B3, 0x952AB45C, 0x000001C7,
+        0x99A05FE3, 0xDE469FBD, 0x000001E1, 0xDB398C25, 0xA59BC234, 0x000001FC,
+        0xA3989F5C, 0xF6C69A72, 0x00000216, 0x54E9BECE, 0xB7DCBF53, 0x00000231,
+        0xF22241E2, 0x88FCF317, 0x0000024C, 0xD35C78A5, 0xCC20CE9B, 0x00000266,
+        0x7B2153DF, 0x98165AF3, 0x00000281, 0x971F303A, 0xE2A0B5DC, 0x0000029B,
+        0x5CE3B396, 0xA8D9D153, 0x000002B6, 0xA4A7443C, 0xFB9B7CD9, 0x000002D0,
+        0xA7A44410, 0xBB764C4C, 0x000002EB, 0xB6409C1A, 0x8BAB8EEF, 0x00000306,
+        0xA657842C, 0xD01FEF10, 0x00000320, 0xE9913129, 0x9B10A4E5, 0x0000033B,
+        0xA19C0C9D, 0xE7109BFB, 0x00000355, 0x623BF429, 0xAC2820D9, 0x00000370,
+        0x7AA7CF85, 0x80444B5E, 0x0000038B, 0x03ACDD2D, 0xBF21E440, 0x000003A5,
+        0x5E44FF8F, 0x8E679C2F, 0x000003C0, 0x9C8CB841, 0xD433179D, 0x000003DA,
+        0xB4E31BA9, 0x9E19DB92, 0x000003F5, 0xBADF77D9, 0xEB96BF6E, 0x0000040F,
+        0x9BF0EE6B, 0xAF87023B, 0x0000042A,
+};
+
+// wuffs_base__private_implementation__small_powers_of_10 contains
+// approximations to the powers of 10, ranging from 1e+0 to 1e+7, with the
+// exponent stepping by 1. Each step consists of three uint32_t elements.
+//
+// For example, the third approximation, for 1e+2, consists of the uint32_t
+// triple (0x00000000, 0xC8000000, 0xFFFFFFC7). The first two of that triple
+// are a little-endian uint64_t value: 0xC800000000000000. The last one is an
+// int32_t value: -57. Together, they represent the approximation:
+//   1e+2   ≈ 0xC800000000000000 * (2 **   -57)  // This approx'n is exact.
+// Similarly, the (0x00000000, 0x9C400000, 0xFFFFFFCE) uint32_t triple means:
+//   1e+4   ≈ 0x9C40000000000000 * (2 **   -50)  // This approx'n is exact.
+static const uint32_t
+    wuffs_base__private_implementation__small_powers_of_10[24] = {
+        0x00000000, 0x80000000, 0xFFFFFFC1, 0x00000000, 0xA0000000, 0xFFFFFFC4,
+        0x00000000, 0xC8000000, 0xFFFFFFC7, 0x00000000, 0xFA000000, 0xFFFFFFCA,
+        0x00000000, 0x9C400000, 0xFFFFFFCE, 0x00000000, 0xC3500000, 0xFFFFFFD1,
+        0x00000000, 0xF4240000, 0xFFFFFFD4, 0x00000000, 0x98968000, 0xFFFFFFD8,
+};
+
+// wuffs_base__private_implementation__f64_powers_of_10 holds powers of 10 that
+// can be exactly represented by a float64 (what C calls a double).
+static const double wuffs_base__private_implementation__f64_powers_of_10[23] = {
+    1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
+    1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22,
+};
+
+// --------
+
+// wuffs_base__private_implementation__medium_prec_bin (abbreviated as MPB) is
+// a fixed precision floating point binary number. Unlike IEEE 754 Floating
+// Point, it cannot represent infinity or NaN (Not a Number).
+//
+// "Medium precision" means that the mantissa holds 64 binary digits, a little
+// more than "double precision", and sizeof(MPB) > sizeof(double). 64 is
+// obviously the number of bits in a uint64_t.
+//
+// An MPB isn't for general purpose arithmetic, only for conversions to and
+// from IEEE 754 double-precision floating point.
+//
+// There is no implicit mantissa bit. The mantissa field is zero if and only if
+// the overall floating point value is ±0. An MPB is normalized if the mantissa
+// is zero or its high bit (the 1<<63 bit) is set.
+//
+// There is no negative bit. An MPB can only represent non-negative numbers.
+//
+// The "all fields are zero" value is valid, and represents the number +0.
+//
+// This is the "Do It Yourself Floating Point" data structure from Loitsch,
+// "Printing Floating-Point Numbers Quickly and Accurately with Integers"
+// (https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf).
+//
+// Florian Loitsch is also the primary contributor to
+// https://github.com/google/double-conversion
+typedef struct {
+  uint64_t mantissa;
+  int32_t exp2;
+} wuffs_base__private_implementation__medium_prec_bin;
+
+static uint32_t  //
+wuffs_base__private_implementation__medium_prec_bin__normalize(
+    wuffs_base__private_implementation__medium_prec_bin* m) {
+  if (m->mantissa == 0) {
+    return 0;
+  }
+  uint32_t shift = wuffs_base__count_leading_zeroes_u64(m->mantissa);
+  m->mantissa <<= shift;
+  m->exp2 -= (int32_t)shift;
+  return shift;
+}
+
+// wuffs_base__private_implementation__medium_prec_bin__mul_pow_10 sets m to be
+// (m * pow), where pow comes from an etc_powers_of_10 triple starting at p.
+//
+// The result is rounded, but not necessarily normalized.
+//
+// Preconditions:
+//  - m is non-NULL.
+//  - m->mantissa is non-zero.
+//  - m->mantissa's high bit is set (i.e. m is normalized).
+//
+// The etc_powers_of_10 triple is already normalized.
+static void  //
+wuffs_base__private_implementation__medium_prec_bin__mul_pow_10(
+    wuffs_base__private_implementation__medium_prec_bin* m,
+    const uint32_t* p) {
+  uint64_t p_mantissa = ((uint64_t)p[0]) | (((uint64_t)p[1]) << 32);
+  int32_t p_exp2 = (int32_t)p[2];
+
+  wuffs_base__multiply_u64__output o =
+      wuffs_base__multiply_u64(m->mantissa, p_mantissa);
+  // Round the mantissa up. It cannot overflow because the maximum possible
+  // value of o.hi is 0xFFFFFFFFFFFFFFFE.
+  m->mantissa = o.hi + (o.lo >> 63);
+  m->exp2 = m->exp2 + p_exp2 + 64;
+}
+
+// wuffs_base__private_implementation__medium_prec_bin__as_f64 converts m to a
+// double (what C calls a double-precision float64).
+//
+// Preconditions:
+//  - m is non-NULL.
+//  - m->mantissa is non-zero.
+//  - m->mantissa's high bit is set (i.e. m is normalized).
+static double  //
+wuffs_base__private_implementation__medium_prec_bin__as_f64(
+    const wuffs_base__private_implementation__medium_prec_bin* m,
+    bool negative) {
+  uint64_t mantissa64 = m->mantissa;
+  // An mpb's mantissa has the implicit (binary) decimal point at the right
+  // hand end of the mantissa's explicit digits. A double-precision's mantissa
+  // has that decimal point near the left hand end. There's also an explicit
+  // versus implicit leading 1 bit (binary digit). Together, the difference in
+  // semantics corresponds to adding 63.
+  int32_t exp2 = m->exp2 + 63;
+
+  // Ensure that exp2 is at least -1022, the minimum double-precision exponent
+  // for normal (as opposed to subnormal) numbers.
+  if (-1022 > exp2) {
+    uint32_t n = (uint32_t)(-1022 - exp2);
+    mantissa64 >>= n;
+    exp2 += (int32_t)n;
+  }
+
+  // Extract the (1 + 52) bits from the 64-bit mantissa64. 52 is the number of
+  // explicit mantissa bits in a double-precision f64.
+  //
+  // Before, we have 64 bits and due to normalization, the high bit 'H' is 1.
+  // 63        55        47       etc     15        7
+  // H210_9876_5432_1098_7654_etc_etc_etc_5432_1098_7654_3210
+  // ++++_++++_++++_++++_++++_etc_etc_etc_++++_+..._...._....  Kept bits.
+  // ...._...._...H_2109_8765_etc_etc_etc_6543_2109_8765_4321  After shifting.
+  // After, we have 53 bits (and bit #52 is this 'H' bit).
+  uint64_t mantissa53 = mantissa64 >> 11;
+
+  // Round up if the old bit #10 (the highest bit dropped by shifting) was set.
+  // We also fix any overflow from rounding up.
+  if (mantissa64 & 1024) {
+    mantissa53++;
+    if ((mantissa53 >> 53) != 0) {
+      mantissa53 >>= 1;
+      exp2++;
+    }
+  }
+
+  // Handle double-precision infinity (a nominal exponent of 1024) and
+  // subnormals (an exponent of -1023 and no implicit mantissa bit, bit #52).
+  if (exp2 >= 1024) {
+    mantissa53 = 0;
+    exp2 = 1024;
+  } else if ((mantissa53 >> 52) == 0) {
+    exp2 = -1023;
+  }
+
+  // Pack the bits and return.
+  const int32_t f64_bias = -1023;
+  uint64_t exp2_bits =
+      (uint64_t)((exp2 - f64_bias) & 0x07FF);           // (1 << 11) - 1.
+  uint64_t bits = (mantissa53 & 0x000FFFFFFFFFFFFF) |   // (1 << 52) - 1.
+                  (exp2_bits << 52) |                   //
+                  (negative ? 0x8000000000000000 : 0);  // (1 << 63).
+  return wuffs_base__ieee_754_bit_representation__to_f64(bits);
+}
+
+// wuffs_base__private_implementation__medium_prec_bin__parse_number_f64
+// converts from an HPD to a double, using an MPB as scratch space. It returns
+// a NULL status.repr if there is no ambiguity in the truncation or rounding to
+// a float64 (an IEEE 754 double-precision floating point value).
+//
+// It may modify m even if it returns a non-NULL status.repr.
+static wuffs_base__result_f64  //
+wuffs_base__private_implementation__medium_prec_bin__parse_number_f64(
+    wuffs_base__private_implementation__medium_prec_bin* m,
+    const wuffs_base__private_implementation__high_prec_dec* h,
+    bool skip_fast_path_for_tests) {
+  do {
+    // m->mantissa is a uint64_t, which is an integer approximation to a
+    // rational value - h's underlying digits after m's normalization. This
+    // error is an upper bound on the difference between the approximate and
+    // actual value.
+    //
+    // The DiyFpStrtod function in https://github.com/google/double-conversion
+    // uses a finer grain (1/8th of the ULP, Unit in the Last Place) when
+    // tracking error. This implementation is coarser (1 ULP) but simpler.
+    //
+    // It is an error in the "numerical approximation" sense, not in the
+    // typical programming sense (as in "bad input" or "a result type").
+    uint64_t error = 0;
+
+    // Convert up to 19 decimal digits (in h->digits) to 64 binary digits (in
+    // m->mantissa): (1e19 < (1<<64)) and ((1<<64) < 1e20). If we have more
+    // than 19 digits, we're truncating (with error).
+    uint32_t i;
+    uint32_t i_end = h->num_digits;
+    if (i_end > 19) {
+      i_end = 19;
+      error = 1;
+    }
+    uint64_t mantissa = 0;
+    for (i = 0; i < i_end; i++) {
+      mantissa = (10 * mantissa) + h->digits[i];
+    }
+    m->mantissa = mantissa;
+    m->exp2 = 0;
+
+    // Check that exp10 lies in the (big_powers_of_10 + small_powers_of_10)
+    // range, -348 ..= +347, stepping big_powers_of_10 by 8 (which is 87
+    // triples) and small_powers_of_10 by 1 (which is 8 triples).
+    int32_t exp10 = h->decimal_point - ((int32_t)(i_end));
+    if (exp10 < -348) {
+      goto fail;
+    }
+    uint32_t bpo10 = ((uint32_t)(exp10 + 348)) / 8;
+    uint32_t spo10 = ((uint32_t)(exp10 + 348)) % 8;
+    if (bpo10 >= 87) {
+      goto fail;
+    }
+
+    // Try a fast path, if float64 math would be exact.
+    //
+    // 15 is such that 1e15 can be losslessly represented in a float64
+    // mantissa: (1e15 < (1<<53)) and ((1<<53) < 1e16).
+    //
+    // 22 is the maximum valid index for the
+    // wuffs_base__private_implementation__f64_powers_of_10 array.
+    do {
+      if (skip_fast_path_for_tests || ((mantissa >> 52) != 0)) {
+        break;
+      }
+      double d = (double)mantissa;
+
+      if (exp10 == 0) {
+        wuffs_base__result_f64 ret;
+        ret.status.repr = NULL;
+        ret.value = h->negative ? -d : +d;
+        return ret;
+
+      } else if (exp10 > 0) {
+        if (exp10 > 22) {
+          if (exp10 > (15 + 22)) {
+            break;
+          }
+          // If exp10 is in the range 23 ..= 37, try moving a few of the zeroes
+          // from the exponent to the mantissa. If we're still under 1e15, we
+          // haven't truncated any mantissa bits.
+          if (exp10 > 22) {
+            d *= wuffs_base__private_implementation__f64_powers_of_10[exp10 -
+                                                                      22];
+            exp10 = 22;
+            if (d >= 1e15) {
+              break;
+            }
+          }
+        }
+        d *= wuffs_base__private_implementation__f64_powers_of_10[exp10];
+        wuffs_base__result_f64 ret;
+        ret.status.repr = NULL;
+        ret.value = h->negative ? -d : +d;
+        return ret;
+
+      } else {  // "if (exp10 < 0)" is effectively "if (true)" here.
+        if (exp10 < -22) {
+          break;
+        }
+        d /= wuffs_base__private_implementation__f64_powers_of_10[-exp10];
+        wuffs_base__result_f64 ret;
+        ret.status.repr = NULL;
+        ret.value = h->negative ? -d : +d;
+        return ret;
+      }
+    } while (0);
+
+    // Normalize (and scale the error).
+    error <<= wuffs_base__private_implementation__medium_prec_bin__normalize(m);
+
+    // Multiplying two MPB values nominally multiplies two mantissas, call them
+    // A and B, which are integer approximations to the precise values (A+a)
+    // and (B+b) for some error terms a and b.
+    //
+    // MPB multiplication calculates (((A+a) * (B+b)) >> 64) to be ((A*B) >>
+    // 64). Shifting (truncating) and rounding introduces further error. The
+    // difference between the calculated result:
+    //  ((A*B                  ) >> 64)
+    // and the true result:
+    //  ((A*B + A*b + a*B + a*b) >> 64)   + rounding_error
+    // is:
+    //  ((      A*b + a*B + a*b) >> 64)   + rounding_error
+    // which can be re-grouped as:
+    //  ((A*b) >> 64) + ((a*(B+b)) >> 64) + rounding_error
+    //
+    // Now, let A and a be "m->mantissa" and "error", and B and b be the
+    // pre-calculated power of 10. A and B are both less than (1 << 64), a is
+    // the "error" local variable and b is less than 1.
+    //
+    // An upper bound (in absolute value) on ((A*b) >> 64) is therefore 1.
+    //
+    // An upper bound on ((a*(B+b)) >> 64) is a, also known as error.
+    //
+    // Finally, the rounding_error is at most 1.
+    //
+    // In total, calling mpb__mul_pow_10 will raise the worst-case error by 2.
+    // The subsequent re-normalization can multiply that by a further factor.
+
+    // Multiply by small_powers_of_10[etc].
+    wuffs_base__private_implementation__medium_prec_bin__mul_pow_10(
+        m, &wuffs_base__private_implementation__small_powers_of_10[3 * spo10]);
+    error += 2;
+    error <<= wuffs_base__private_implementation__medium_prec_bin__normalize(m);
+
+    // Multiply by big_powers_of_10[etc].
+    wuffs_base__private_implementation__medium_prec_bin__mul_pow_10(
+        m, &wuffs_base__private_implementation__big_powers_of_10[3 * bpo10]);
+    error += 2;
+    error <<= wuffs_base__private_implementation__medium_prec_bin__normalize(m);
+
+    // We have a good approximation of h, but we still have to check whether
+    // the error is small enough. Equivalently, whether the number of surplus
+    // mantissa bits (the bits dropped when going from m's 64 mantissa bits to
+    // the smaller number of double-precision mantissa bits) would always round
+    // up or down, even when perturbed by ±error. We start at 11 surplus bits
+    // (m has 64, double-precision has 1+52), but it can be higher for
+    // subnormals.
+    //
+    // In many cases, the error is small enough and we return true.
+    const int32_t f64_bias = -1023;
+    int32_t subnormal_exp2 = f64_bias - 63;
+    uint32_t surplus_bits = 11;
+    if (subnormal_exp2 >= m->exp2) {
+      surplus_bits += 1 + ((uint32_t)(subnormal_exp2 - m->exp2));
+    }
+
+    uint64_t surplus_mask =
+        (((uint64_t)1) << surplus_bits) - 1;  // e.g. 0x07FF.
+    uint64_t surplus = m->mantissa & surplus_mask;
+    uint64_t halfway = ((uint64_t)1) << (surplus_bits - 1);  // e.g. 0x0400.
+
+    // Do the final calculation in *signed* arithmetic.
+    int64_t i_surplus = (int64_t)surplus;
+    int64_t i_halfway = (int64_t)halfway;
+    int64_t i_error = (int64_t)error;
+
+    if ((i_surplus > (i_halfway - i_error)) &&
+        (i_surplus < (i_halfway + i_error))) {
+      goto fail;
+    }
+
+    wuffs_base__result_f64 ret;
+    ret.status.repr = NULL;
+    ret.value = wuffs_base__private_implementation__medium_prec_bin__as_f64(
+        m, h->negative);
+    return ret;
+  } while (0);
+
+fail:
+  do {
+    wuffs_base__result_f64 ret;
+    ret.status.repr = "#base: mpb__parse_number_f64 failed";
+    ret.value = 0;
+    return ret;
+  } while (0);
+}
+
+// --------
+
+wuffs_base__result_f64  //
+wuffs_base__parse_number_f64_special(wuffs_base__slice_u8 s,
+                                     const char* fallback_status_repr) {
+  do {
+    uint8_t* p = s.ptr;
+    uint8_t* q = s.ptr + s.len;
+
+    for (; (p < q) && (*p == '_'); p++) {
+    }
+    if (p >= q) {
+      goto fallback;
+    }
+
+    // Parse sign.
+    bool negative = false;
+    do {
+      if (*p == '+') {
+        p++;
+      } else if (*p == '-') {
+        negative = true;
+        p++;
+      } else {
+        break;
+      }
+      for (; (p < q) && (*p == '_'); p++) {
+      }
+    } while (0);
+    if (p >= q) {
+      goto fallback;
+    }
+
+    bool nan = false;
+    switch (p[0]) {
+      case 'I':
+      case 'i':
+        if (((q - p) < 3) ||                     //
+            ((p[1] != 'N') && (p[1] != 'n')) ||  //
+            ((p[2] != 'F') && (p[2] != 'f'))) {
+          goto fallback;
+        }
+        p += 3;
+
+        if ((p >= q) || (*p == '_')) {
+          break;
+        } else if (((q - p) < 5) ||                     //
+                   ((p[0] != 'I') && (p[0] != 'i')) ||  //
+                   ((p[1] != 'N') && (p[1] != 'n')) ||  //
+                   ((p[2] != 'I') && (p[2] != 'i')) ||  //
+                   ((p[3] != 'T') && (p[3] != 't')) ||  //
+                   ((p[4] != 'Y') && (p[4] != 'y'))) {
+          goto fallback;
+        }
+        p += 5;
+
+        if ((p >= q) || (*p == '_')) {
+          break;
+        }
+        goto fallback;
+
+      case 'N':
+      case 'n':
+        if (((q - p) < 3) ||                     //
+            ((p[1] != 'A') && (p[1] != 'a')) ||  //
+            ((p[2] != 'N') && (p[2] != 'n'))) {
+          goto fallback;
+        }
+        p += 3;
+
+        if ((p >= q) || (*p == '_')) {
+          nan = true;
+          break;
+        }
+        goto fallback;
+
+      default:
+        goto fallback;
+    }
+
+    // Finish.
+    for (; (p < q) && (*p == '_'); p++) {
+    }
+    if (p != q) {
+      goto fallback;
+    }
+    wuffs_base__result_f64 ret;
+    ret.status.repr = NULL;
+    ret.value = wuffs_base__ieee_754_bit_representation__to_f64(
+        (nan ? 0x7FFFFFFFFFFFFFFF : 0x7FF0000000000000) |
+        (negative ? 0x8000000000000000 : 0));
+    return ret;
+  } while (0);
+
+fallback:
+  do {
+    wuffs_base__result_f64 ret;
+    ret.status.repr = fallback_status_repr;
+    ret.value = 0;
+    return ret;
+  } while (0);
+}
+
+wuffs_base__result_f64  //
+wuffs_base__parse_number_f64(wuffs_base__slice_u8 s) {
+  wuffs_base__private_implementation__medium_prec_bin m;
+  wuffs_base__private_implementation__high_prec_dec h;
+
+  do {
+    // powers converts decimal powers of 10 to binary powers of 2. For example,
+    // (10000 >> 13) is 1. It stops before the elements exceed 60, also known
+    // as WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL.
+    static const uint32_t num_powers = 19;
+    static const uint8_t powers[19] = {
+        0,  3,  6,  9,  13, 16, 19, 23, 26, 29,  //
+        33, 36, 39, 43, 46, 49, 53, 56, 59,      //
+    };
+
+    wuffs_base__status status =
+        wuffs_base__private_implementation__high_prec_dec__parse(&h, s);
+    if (status.repr) {
+      return wuffs_base__parse_number_f64_special(s, status.repr);
+    }
+
+    // Handle zero and obvious extremes. The largest and smallest positive
+    // finite f64 values are approximately 1.8e+308 and 4.9e-324.
+    if ((h.num_digits == 0) || (h.decimal_point < -326)) {
+      goto zero;
+    } else if (h.decimal_point > 310) {
+      goto infinity;
+    }
+
+    wuffs_base__result_f64 mpb_result =
+        wuffs_base__private_implementation__medium_prec_bin__parse_number_f64(
+            &m, &h, false);
+    if (mpb_result.status.repr == NULL) {
+      return mpb_result;
+    }
+
+    // Scale by powers of 2 until we're in the range [½ .. 1], which gives us
+    // our exponent (in base-2). First we shift right, possibly a little too
+    // far, ending with a value certainly below 1 and possibly below ½...
+    const int32_t f64_bias = -1023;
+    int32_t exp2 = 0;
+    while (h.decimal_point > 0) {
+      uint32_t n = (uint32_t)(+h.decimal_point);
+      uint32_t shift =
+          (n < num_powers)
+              ? powers[n]
+              : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
+
+      wuffs_base__private_implementation__high_prec_dec__small_rshift(&h,
+                                                                      shift);
+      if (h.decimal_point <
+          -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
+        goto zero;
+      }
+      exp2 += (int32_t)shift;
+    }
+    // ...then we shift left, putting us in [½ .. 1].
+    while (h.decimal_point <= 0) {
+      uint32_t shift;
+      if (h.decimal_point == 0) {
+        if (h.digits[0] >= 5) {
+          break;
+        }
+        shift = (h.digits[0] <= 2) ? 2 : 1;
+      } else {
+        uint32_t n = (uint32_t)(-h.decimal_point);
+        shift = (n < num_powers)
+                    ? powers[n]
+                    : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
+      }
+
+      wuffs_base__private_implementation__high_prec_dec__small_lshift(&h,
+                                                                      shift);
+      if (h.decimal_point >
+          +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
+        goto infinity;
+      }
+      exp2 -= (int32_t)shift;
+    }
+
+    // We're in the range [½ .. 1] but f64 uses [1 .. 2].
+    exp2--;
+
+    // The minimum normal exponent is (f64_bias + 1).
+    while ((f64_bias + 1) > exp2) {
+      uint32_t n = (uint32_t)((f64_bias + 1) - exp2);
+      if (n > WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL) {
+        n = WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
+      }
+      wuffs_base__private_implementation__high_prec_dec__small_rshift(&h, n);
+      exp2 += (int32_t)n;
+    }
+
+    // Check for overflow.
+    if ((exp2 - f64_bias) >= 0x07FF) {  // (1 << 11) - 1.
+      goto infinity;
+    }
+
+    // Extract 53 bits for the mantissa (in base-2).
+    wuffs_base__private_implementation__high_prec_dec__small_lshift(&h, 53);
+    uint64_t man2 =
+        wuffs_base__private_implementation__high_prec_dec__rounded_integer(&h);
+
+    // Rounding might have added one bit. If so, shift and re-check overflow.
+    if ((man2 >> 53) != 0) {
+      man2 >>= 1;
+      exp2++;
+      if ((exp2 - f64_bias) >= 0x07FF) {  // (1 << 11) - 1.
+        goto infinity;
+      }
+    }
+
+    // Handle subnormal numbers.
+    if ((man2 >> 52) == 0) {
+      exp2 = f64_bias;
+    }
+
+    // Pack the bits and return.
+    uint64_t exp2_bits =
+        (uint64_t)((exp2 - f64_bias) & 0x07FF);             // (1 << 11) - 1.
+    uint64_t bits = (man2 & 0x000FFFFFFFFFFFFF) |           // (1 << 52) - 1.
+                    (exp2_bits << 52) |                     //
+                    (h.negative ? 0x8000000000000000 : 0);  // (1 << 63).
+
+    wuffs_base__result_f64 ret;
+    ret.status.repr = NULL;
+    ret.value = wuffs_base__ieee_754_bit_representation__to_f64(bits);
+    return ret;
+  } while (0);
+
+zero:
+  do {
+    uint64_t bits = h.negative ? 0x8000000000000000 : 0;
+
+    wuffs_base__result_f64 ret;
+    ret.status.repr = NULL;
+    ret.value = wuffs_base__ieee_754_bit_representation__to_f64(bits);
+    return ret;
+  } while (0);
+
+infinity:
+  do {
+    uint64_t bits = h.negative ? 0xFFF0000000000000 : 0x7FF0000000000000;
+
+    wuffs_base__result_f64 ret;
+    ret.status.repr = NULL;
+    ret.value = wuffs_base__ieee_754_bit_representation__to_f64(bits);
+    return ret;
+  } while (0);
+}
diff --git a/internal/cgen/base/image-impl.c b/internal/cgen/base/image-impl.c
index 413eec5..6f4baa1 100644
--- a/internal/cgen/base/image-impl.c
+++ b/internal/cgen/base/image-impl.c
@@ -23,51 +23,6 @@
 
 // --------
 
-static uint64_t  //
-wuffs_base__pixel_swizzler__squash_bgr_565_888(wuffs_base__slice_u8 dst,
-                                               wuffs_base__slice_u8 src) {
-  size_t len4 = (dst.len < src.len ? dst.len : src.len) / 4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-
-  size_t n = len4;
-  while (n--) {
-    uint32_t argb = wuffs_base__load_u32le__no_bounds_check(s);
-    uint32_t b5 = 0x1F & (argb >> (8 - 5));
-    uint32_t g6 = 0x3F & (argb >> (16 - 6));
-    uint32_t r5 = 0x1F & (argb >> (24 - 5));
-    uint32_t alpha = argb & 0xFF000000;
-    wuffs_base__store_u32le__no_bounds_check(
-        d, alpha | (r5 << 11) | (g6 << 5) | (b5 << 0));
-    s += 4;
-    d += 4;
-  }
-  return len4 * 4;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__swap_rgbx_bgrx(wuffs_base__slice_u8 dst,
-                                           wuffs_base__slice_u8 src) {
-  size_t len4 = (dst.len < src.len ? dst.len : src.len) / 4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-
-  size_t n = len4;
-  while (n--) {
-    uint8_t b0 = s[0];
-    uint8_t b1 = s[1];
-    uint8_t b2 = s[2];
-    uint8_t b3 = s[3];
-    d[0] = b2;
-    d[1] = b1;
-    d[2] = b0;
-    d[3] = b3;
-    s += 4;
-    d += 4;
-  }
-  return len4 * 4;
-}
-
 static inline uint32_t  //
 wuffs_base__swap_u32_argb_abgr(uint32_t u) {
   uint32_t o = u & 0xFF00FF00;
@@ -78,152 +33,6 @@
 
 // --------
 
-static inline uint32_t  //
-wuffs_base__composite_nonpremul_nonpremul_u32_axxx(uint32_t dst_nonpremul,
-                                                   uint32_t src_nonpremul) {
-  // Convert from 8-bit color to 16-bit color.
-  uint32_t sa = 0x101 * (0xFF & (src_nonpremul >> 24));
-  uint32_t sr = 0x101 * (0xFF & (src_nonpremul >> 16));
-  uint32_t sg = 0x101 * (0xFF & (src_nonpremul >> 8));
-  uint32_t sb = 0x101 * (0xFF & (src_nonpremul >> 0));
-  uint32_t da = 0x101 * (0xFF & (dst_nonpremul >> 24));
-  uint32_t dr = 0x101 * (0xFF & (dst_nonpremul >> 16));
-  uint32_t dg = 0x101 * (0xFF & (dst_nonpremul >> 8));
-  uint32_t db = 0x101 * (0xFF & (dst_nonpremul >> 0));
-
-  // Convert dst from nonpremul to premul.
-  dr = (dr * da) / 0xFFFF;
-  dg = (dg * da) / 0xFFFF;
-  db = (db * da) / 0xFFFF;
-
-  // Calculate the inverse of the src-alpha: how much of the dst to keep.
-  uint32_t ia = 0xFFFF - sa;
-
-  // Composite src (nonpremul) over dst (premul).
-  da = sa + ((da * ia) / 0xFFFF);
-  dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
-  dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
-  db = ((sb * sa) + (db * ia)) / 0xFFFF;
-
-  // Convert dst from premul to nonpremul.
-  if (da != 0) {
-    dr = (dr * 0xFFFF) / da;
-    dg = (dg * 0xFFFF) / da;
-    db = (db * 0xFFFF) / da;
-  }
-
-  // Convert from 16-bit color to 8-bit color and combine the components.
-  da >>= 8;
-  dr >>= 8;
-  dg >>= 8;
-  db >>= 8;
-  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
-}
-
-static inline uint32_t  //
-wuffs_base__composite_nonpremul_premul_u32_axxx(uint32_t dst_nonpremul,
-                                                uint32_t src_premul) {
-  // Convert from 8-bit color to 16-bit color.
-  uint32_t sa = 0x101 * (0xFF & (src_premul >> 24));
-  uint32_t sr = 0x101 * (0xFF & (src_premul >> 16));
-  uint32_t sg = 0x101 * (0xFF & (src_premul >> 8));
-  uint32_t sb = 0x101 * (0xFF & (src_premul >> 0));
-  uint32_t da = 0x101 * (0xFF & (dst_nonpremul >> 24));
-  uint32_t dr = 0x101 * (0xFF & (dst_nonpremul >> 16));
-  uint32_t dg = 0x101 * (0xFF & (dst_nonpremul >> 8));
-  uint32_t db = 0x101 * (0xFF & (dst_nonpremul >> 0));
-
-  // Convert dst from nonpremul to premul.
-  dr = (dr * da) / 0xFFFF;
-  dg = (dg * da) / 0xFFFF;
-  db = (db * da) / 0xFFFF;
-
-  // Calculate the inverse of the src-alpha: how much of the dst to keep.
-  uint32_t ia = 0xFFFF - sa;
-
-  // Composite src (premul) over dst (premul).
-  da = sa + ((da * ia) / 0xFFFF);
-  dr = sr + ((dr * ia) / 0xFFFF);
-  dg = sg + ((dg * ia) / 0xFFFF);
-  db = sb + ((db * ia) / 0xFFFF);
-
-  // Convert dst from premul to nonpremul.
-  if (da != 0) {
-    dr = (dr * 0xFFFF) / da;
-    dg = (dg * 0xFFFF) / da;
-    db = (db * 0xFFFF) / da;
-  }
-
-  // Convert from 16-bit color to 8-bit color and combine the components.
-  da >>= 8;
-  dr >>= 8;
-  dg >>= 8;
-  db >>= 8;
-  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
-}
-
-static inline uint32_t  //
-wuffs_base__composite_premul_nonpremul_u32_axxx(uint32_t dst_premul,
-                                                uint32_t src_nonpremul) {
-  // Convert from 8-bit color to 16-bit color.
-  uint32_t sa = 0x101 * (0xFF & (src_nonpremul >> 24));
-  uint32_t sr = 0x101 * (0xFF & (src_nonpremul >> 16));
-  uint32_t sg = 0x101 * (0xFF & (src_nonpremul >> 8));
-  uint32_t sb = 0x101 * (0xFF & (src_nonpremul >> 0));
-  uint32_t da = 0x101 * (0xFF & (dst_premul >> 24));
-  uint32_t dr = 0x101 * (0xFF & (dst_premul >> 16));
-  uint32_t dg = 0x101 * (0xFF & (dst_premul >> 8));
-  uint32_t db = 0x101 * (0xFF & (dst_premul >> 0));
-
-  // Calculate the inverse of the src-alpha: how much of the dst to keep.
-  uint32_t ia = 0xFFFF - sa;
-
-  // Composite src (nonpremul) over dst (premul).
-  da = sa + ((da * ia) / 0xFFFF);
-  dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
-  dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
-  db = ((sb * sa) + (db * ia)) / 0xFFFF;
-
-  // Convert from 16-bit color to 8-bit color and combine the components.
-  da >>= 8;
-  dr >>= 8;
-  dg >>= 8;
-  db >>= 8;
-  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
-}
-
-static inline uint32_t  //
-wuffs_base__composite_premul_premul_u32_axxx(uint32_t dst_premul,
-                                             uint32_t src_premul) {
-  // Convert from 8-bit color to 16-bit color.
-  uint32_t sa = 0x101 * (0xFF & (src_premul >> 24));
-  uint32_t sr = 0x101 * (0xFF & (src_premul >> 16));
-  uint32_t sg = 0x101 * (0xFF & (src_premul >> 8));
-  uint32_t sb = 0x101 * (0xFF & (src_premul >> 0));
-  uint32_t da = 0x101 * (0xFF & (dst_premul >> 24));
-  uint32_t dr = 0x101 * (0xFF & (dst_premul >> 16));
-  uint32_t dg = 0x101 * (0xFF & (dst_premul >> 8));
-  uint32_t db = 0x101 * (0xFF & (dst_premul >> 0));
-
-  // Calculate the inverse of the src-alpha: how much of the dst to keep.
-  uint32_t ia = 0xFFFF - sa;
-
-  // Composite src (premul) over dst (premul).
-  da = sa + ((da * ia) / 0xFFFF);
-  dr = sr + ((dr * ia) / 0xFFFF);
-  dg = sg + ((dg * ia) / 0xFFFF);
-  db = sb + ((db * ia) / 0xFFFF);
-
-  // Convert from 16-bit color to 8-bit color and combine the components.
-  da >>= 8;
-  dr >>= 8;
-  dg >>= 8;
-  db >>= 8;
-  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
-}
-
-// --------
-
 wuffs_base__color_u32_argb_premul  //
 wuffs_base__pixel_buffer__color_u32_at(const wuffs_base__pixel_buffer* pb,
                                        uint32_t x,
@@ -447,985 +256,3 @@
 
   return (uint8_t)best_index;
 }
-
-// --------
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__copy_1_1(wuffs_base__slice_u8 dst,
-                                     wuffs_base__slice_u8 dst_palette,
-                                     wuffs_base__slice_u8 src) {
-  return wuffs_base__slice_u8__copy_from_slice(dst, src);
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__copy_3_3(wuffs_base__slice_u8 dst,
-                                     wuffs_base__slice_u8 dst_palette,
-                                     wuffs_base__slice_u8 src) {
-  size_t dst_len3 = dst.len / 3;
-  size_t src_len3 = src.len / 3;
-  size_t len = dst_len3 < src_len3 ? dst_len3 : src_len3;
-  if (len > 0) {
-    memmove(dst.ptr, src.ptr, len * 3);
-  }
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__copy_4_4(wuffs_base__slice_u8 dst,
-                                     wuffs_base__slice_u8 dst_palette,
-                                     wuffs_base__slice_u8 src) {
-  size_t dst_len4 = dst.len / 4;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
-  if (len > 0) {
-    memmove(dst.ptr, src.ptr, len * 4);
-  }
-  return len;
-}
-
-// --------
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr_565__bgr(wuffs_base__slice_u8 dst,
-                                         wuffs_base__slice_u8 dst_palette,
-                                         wuffs_base__slice_u8 src) {
-  size_t dst_len2 = dst.len / 2;
-  size_t src_len3 = src.len / 3;
-  size_t len = dst_len2 < src_len3 ? dst_len2 : src_len3;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint32_t b5 = s[0] >> 3;
-    uint32_t g6 = s[1] >> 2;
-    uint32_t r5 = s[2] >> 3;
-    uint32_t rgb_565 = (r5 << 11) | (g6 << 5) | (b5 << 0);
-    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);
-
-    s += 1 * 3;
-    d += 1 * 2;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  size_t dst_len2 = dst.len / 2;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len2 < src_len4 ? dst_len2 : src_len4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    wuffs_base__store_u16le__no_bounds_check(
-        d + (0 * 2),
-        wuffs_base__color_u32_argb_premul__as__color_u16_rgb_565(
-            wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(
-                wuffs_base__load_u32le__no_bounds_check(s + (0 * 4)))));
-
-    s += 1 * 4;
-    d += 1 * 2;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  size_t dst_len2 = dst.len / 2;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len2 < src_len4 ? dst_len2 : src_len4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    // Convert from 8-bit color to 16-bit color.
-    uint32_t sa = 0x101 * ((uint32_t)s[3]);
-    uint32_t sr = 0x101 * ((uint32_t)s[2]);
-    uint32_t sg = 0x101 * ((uint32_t)s[1]);
-    uint32_t sb = 0x101 * ((uint32_t)s[0]);
-
-    // Convert from 565 color to 16-bit color.
-    uint32_t old_rgb_565 = wuffs_base__load_u16le__no_bounds_check(d + (0 * 2));
-    uint32_t old_r5 = 0x1F & (old_rgb_565 >> 11);
-    uint32_t dr = (0x8421 * old_r5) >> 4;
-    uint32_t old_g6 = 0x3F & (old_rgb_565 >> 5);
-    uint32_t dg = (0x1041 * old_g6) >> 2;
-    uint32_t old_b5 = 0x1F & (old_rgb_565 >> 0);
-    uint32_t db = (0x8421 * old_b5) >> 4;
-
-    // Calculate the inverse of the src-alpha: how much of the dst to keep.
-    uint32_t ia = 0xFFFF - sa;
-
-    // Composite src (nonpremul) over dst (premul).
-    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
-    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
-    db = ((sb * sa) + (db * ia)) / 0xFFFF;
-
-    // Convert from 16-bit color to 565 color and combine the components.
-    uint32_t new_r5 = 0x1F & (dr >> 11);
-    uint32_t new_g6 = 0x3F & (dg >> 10);
-    uint32_t new_b5 = 0x1F & (db >> 11);
-    uint32_t new_rgb_565 = (new_r5 << 11) | (new_g6 << 5) | (new_b5 << 0);
-    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2),
-                                             (uint16_t)new_rgb_565);
-
-    s += 1 * 4;
-    d += 1 * 2;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr_565__y(wuffs_base__slice_u8 dst,
-                                       wuffs_base__slice_u8 dst_palette,
-                                       wuffs_base__slice_u8 src) {
-  size_t dst_len2 = dst.len / 2;
-  size_t len = dst_len2 < src.len ? dst_len2 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint32_t y5 = s[0] >> 3;
-    uint32_t y6 = s[0] >> 2;
-    uint32_t rgb_565 = (y5 << 11) | (y6 << 5) | (y5 << 0);
-    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);
-
-    s += 1 * 1;
-    d += 1 * 2;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr_565__index__src(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  if (dst_palette.len != 1024) {
-    return 0;
-  }
-  size_t dst_len2 = dst.len / 2;
-  size_t len = dst_len2 < src.len ? dst_len2 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  const size_t loop_unroll_count = 4;
-
-  while (n >= loop_unroll_count) {
-    wuffs_base__store_u16le__no_bounds_check(
-        d + (0 * 2), wuffs_base__load_u16le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[0] * 4)));
-    wuffs_base__store_u16le__no_bounds_check(
-        d + (1 * 2), wuffs_base__load_u16le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[1] * 4)));
-    wuffs_base__store_u16le__no_bounds_check(
-        d + (2 * 2), wuffs_base__load_u16le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[2] * 4)));
-    wuffs_base__store_u16le__no_bounds_check(
-        d + (3 * 2), wuffs_base__load_u16le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[3] * 4)));
-
-    s += loop_unroll_count * 1;
-    d += loop_unroll_count * 2;
-    n -= loop_unroll_count;
-  }
-
-  while (n >= 1) {
-    wuffs_base__store_u16le__no_bounds_check(
-        d + (0 * 2), wuffs_base__load_u16le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[0] * 4)));
-
-    s += 1 * 1;
-    d += 1 * 2;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  if (dst_palette.len != 1024) {
-    return 0;
-  }
-  size_t dst_len2 = dst.len / 2;
-  size_t len = dst_len2 < src.len ? dst_len2 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[0] * 4));
-    if (s0) {
-      wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)s0);
-    }
-
-    s += 1 * 1;
-    d += 1 * 2;
-    n -= 1;
-  }
-
-  return len;
-}
-
-// --------
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  size_t dst_len3 = dst.len / 3;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len3 < src_len4 ? dst_len3 : src_len4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint32_t s0 =
-        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(
-            wuffs_base__load_u32le__no_bounds_check(s + (0 * 4)));
-    wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
-
-    s += 1 * 4;
-    d += 1 * 3;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  size_t dst_len3 = dst.len / 3;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len3 < src_len4 ? dst_len3 : src_len4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    // Convert from 8-bit color to 16-bit color.
-    uint32_t sa = 0x101 * ((uint32_t)s[3]);
-    uint32_t sr = 0x101 * ((uint32_t)s[2]);
-    uint32_t sg = 0x101 * ((uint32_t)s[1]);
-    uint32_t sb = 0x101 * ((uint32_t)s[0]);
-    uint32_t dr = 0x101 * ((uint32_t)d[2]);
-    uint32_t dg = 0x101 * ((uint32_t)d[1]);
-    uint32_t db = 0x101 * ((uint32_t)d[0]);
-
-    // Calculate the inverse of the src-alpha: how much of the dst to keep.
-    uint32_t ia = 0xFFFF - sa;
-
-    // Composite src (nonpremul) over dst (premul).
-    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
-    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
-    db = ((sb * sa) + (db * ia)) / 0xFFFF;
-
-    // Convert from 16-bit color to 8-bit color.
-    d[0] = (uint8_t)(db >> 8);
-    d[1] = (uint8_t)(dg >> 8);
-    d[2] = (uint8_t)(dr >> 8);
-
-    s += 1 * 4;
-    d += 1 * 3;
-    n -= 1;
-  }
-
-  return len;
-}
-
-// --------
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  size_t dst_len4 = dst.len / 4;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint32_t d0 = wuffs_base__load_u32le__no_bounds_check(d + (0 * 4));
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 4),
-        wuffs_base__composite_nonpremul_nonpremul_u32_axxx(d0, s0));
-
-    s += 1 * 4;
-    d += 1 * 4;
-    n -= 1;
-  }
-
-  return len;
-}
-
-// --------
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  size_t dst_len4 = dst.len / 4;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 4),
-        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(s0));
-
-    s += 1 * 4;
-    d += 1 * 4;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  size_t dst_len4 = dst.len / 4;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint32_t d0 = wuffs_base__load_u32le__no_bounds_check(d + (0 * 4));
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 4), wuffs_base__composite_premul_nonpremul_u32_axxx(d0, s0));
-
-    s += 1 * 4;
-    d += 1 * 4;
-    n -= 1;
-  }
-
-  return len;
-}
-
-// --------
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__xxx__index__src(wuffs_base__slice_u8 dst,
-                                            wuffs_base__slice_u8 dst_palette,
-                                            wuffs_base__slice_u8 src) {
-  if (dst_palette.len != 1024) {
-    return 0;
-  }
-  size_t dst_len3 = dst.len / 3;
-  size_t len = dst_len3 < src.len ? dst_len3 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  const size_t loop_unroll_count = 4;
-
-  // The comparison in the while condition is ">", not ">=", because with
-  // ">=", the last 4-byte store could write past the end of the dst slice.
-  //
-  // Each 4-byte store writes one too many bytes, but a subsequent store
-  // will overwrite that with the correct byte. There is always another
-  // store, whether a 4-byte store in this loop or a 1-byte store in the
-  // next loop.
-  while (n > loop_unroll_count) {
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 3), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[0] * 4)));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (1 * 3), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[1] * 4)));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (2 * 3), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[2] * 4)));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (3 * 3), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[3] * 4)));
-
-    s += loop_unroll_count * 1;
-    d += loop_unroll_count * 3;
-    n -= loop_unroll_count;
-  }
-
-  while (n >= 1) {
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[0] * 4));
-    wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
-
-    s += 1 * 1;
-    d += 1 * 3;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  if (dst_palette.len != 1024) {
-    return 0;
-  }
-  size_t dst_len3 = dst.len / 3;
-  size_t len = dst_len3 < src.len ? dst_len3 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  const size_t loop_unroll_count = 4;
-
-  while (n >= loop_unroll_count) {
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[0] * 4));
-    if (s0) {
-      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
-    }
-    uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[1] * 4));
-    if (s1) {
-      wuffs_base__store_u24le__no_bounds_check(d + (1 * 3), s1);
-    }
-    uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[2] * 4));
-    if (s2) {
-      wuffs_base__store_u24le__no_bounds_check(d + (2 * 3), s2);
-    }
-    uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[3] * 4));
-    if (s3) {
-      wuffs_base__store_u24le__no_bounds_check(d + (3 * 3), s3);
-    }
-
-    s += loop_unroll_count * 1;
-    d += loop_unroll_count * 3;
-    n -= loop_unroll_count;
-  }
-
-  while (n >= 1) {
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[0] * 4));
-    if (s0) {
-      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
-    }
-
-    s += 1 * 1;
-    d += 1 * 3;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__xxx__y(wuffs_base__slice_u8 dst,
-                                   wuffs_base__slice_u8 dst_palette,
-                                   wuffs_base__slice_u8 src) {
-  size_t dst_len3 = dst.len / 3;
-  size_t len = dst_len3 < src.len ? dst_len3 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint8_t s0 = s[0];
-    d[0] = s0;
-    d[1] = s0;
-    d[2] = s0;
-
-    s += 1 * 1;
-    d += 1 * 3;
-    n -= 1;
-  }
-
-  return len;
-}
-
-// --------
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__xxxx__index__src(wuffs_base__slice_u8 dst,
-                                             wuffs_base__slice_u8 dst_palette,
-                                             wuffs_base__slice_u8 src) {
-  if (dst_palette.len != 1024) {
-    return 0;
-  }
-  size_t dst_len4 = dst.len / 4;
-  size_t len = dst_len4 < src.len ? dst_len4 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  const size_t loop_unroll_count = 4;
-
-  while (n >= loop_unroll_count) {
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[0] * 4)));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (1 * 4), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[1] * 4)));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (2 * 4), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[2] * 4)));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (3 * 4), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[3] * 4)));
-
-    s += loop_unroll_count * 1;
-    d += loop_unroll_count * 4;
-    n -= loop_unroll_count;
-  }
-
-  while (n >= 1) {
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[0] * 4)));
-
-    s += 1 * 1;
-    d += 1 * 4;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  if (dst_palette.len != 1024) {
-    return 0;
-  }
-  size_t dst_len4 = dst.len / 4;
-  size_t len = dst_len4 < src.len ? dst_len4 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  const size_t loop_unroll_count = 4;
-
-  while (n >= loop_unroll_count) {
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[0] * 4));
-    if (s0) {
-      wuffs_base__store_u32le__no_bounds_check(d + (0 * 4), s0);
-    }
-    uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[1] * 4));
-    if (s1) {
-      wuffs_base__store_u32le__no_bounds_check(d + (1 * 4), s1);
-    }
-    uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[2] * 4));
-    if (s2) {
-      wuffs_base__store_u32le__no_bounds_check(d + (2 * 4), s2);
-    }
-    uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[3] * 4));
-    if (s3) {
-      wuffs_base__store_u32le__no_bounds_check(d + (3 * 4), s3);
-    }
-
-    s += loop_unroll_count * 1;
-    d += loop_unroll_count * 4;
-    n -= loop_unroll_count;
-  }
-
-  while (n >= 1) {
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[0] * 4));
-    if (s0) {
-      wuffs_base__store_u32le__no_bounds_check(d + (0 * 4), s0);
-    }
-
-    s += 1 * 1;
-    d += 1 * 4;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__xxxx__xxx(wuffs_base__slice_u8 dst,
-                                      wuffs_base__slice_u8 dst_palette,
-                                      wuffs_base__slice_u8 src) {
-  size_t dst_len4 = dst.len / 4;
-  size_t src_len3 = src.len / 3;
-  size_t len = dst_len4 < src_len3 ? dst_len4 : src_len3;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 4),
-        0xFF000000 | wuffs_base__load_u24le__no_bounds_check(s + (0 * 3)));
-
-    s += 1 * 3;
-    d += 1 * 4;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__xxxx__y(wuffs_base__slice_u8 dst,
-                                    wuffs_base__slice_u8 dst_palette,
-                                    wuffs_base__slice_u8 src) {
-  size_t dst_len4 = dst.len / 4;
-  size_t len = dst_len4 < src.len ? dst_len4 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 4), 0xFF000000 | (0x010101 * (uint32_t)s[0]));
-
-    s += 1 * 1;
-    d += 1 * 4;
-    n -= 1;
-  }
-
-  return len;
-}
-
-// --------
-
-static wuffs_base__pixel_swizzler__func  //
-wuffs_base__pixel_swizzler__prepare__y(wuffs_base__pixel_swizzler* p,
-                                       wuffs_base__pixel_format dst_format,
-                                       wuffs_base__slice_u8 dst_palette,
-                                       wuffs_base__slice_u8 src_palette,
-                                       wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
-    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
-      return wuffs_base__pixel_swizzler__bgr_565__y;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGR:
-    case WUFFS_BASE__PIXEL_FORMAT__RGB:
-      return wuffs_base__pixel_swizzler__xxx__y;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
-      return wuffs_base__pixel_swizzler__xxxx__y;
-  }
-  return NULL;
-}
-
-static wuffs_base__pixel_swizzler__func  //
-wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(
-    wuffs_base__pixel_swizzler* p,
-    wuffs_base__pixel_format dst_format,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src_palette,
-    wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
-    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:
-      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
-          1024) {
-        return NULL;
-      }
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__copy_1_1;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
-      if (wuffs_base__pixel_swizzler__squash_bgr_565_888(dst_palette,
-                                                         src_palette) != 1024) {
-        return NULL;
-      }
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__bgr_565__index__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGR:
-      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
-          1024) {
-        return NULL;
-      }
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__xxx__index__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
-      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
-          1024) {
-        return NULL;
-      }
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__xxxx__index__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__RGB:
-      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(dst_palette,
-                                                     src_palette) != 1024) {
-        return NULL;
-      }
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__xxx__index__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
-      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(dst_palette,
-                                                     src_palette) != 1024) {
-        return NULL;
-      }
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__xxxx__index__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;
-      }
-      return NULL;
-  }
-  return NULL;
-}
-
-static wuffs_base__pixel_swizzler__func  //
-wuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,
-                                         wuffs_base__pixel_format dst_format,
-                                         wuffs_base__slice_u8 dst_palette,
-                                         wuffs_base__slice_u8 src_palette,
-                                         wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
-    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
-      return wuffs_base__pixel_swizzler__bgr_565__bgr;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGR:
-      return wuffs_base__pixel_swizzler__copy_3_3;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
-      return wuffs_base__pixel_swizzler__xxxx__xxx;
-
-    case WUFFS_BASE__PIXEL_FORMAT__RGB:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
-      // TODO.
-      break;
-  }
-  return NULL;
-}
-
-static wuffs_base__pixel_swizzler__func  //
-wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(
-    wuffs_base__pixel_swizzler* p,
-    wuffs_base__pixel_format dst_format,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src_palette,
-    wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
-    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGR:
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__copy_4_4;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
-      // TODO.
-      break;
-
-    case WUFFS_BASE__PIXEL_FORMAT__RGB:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
-      // TODO.
-      break;
-  }
-  return NULL;
-}
-
-// --------
-
-wuffs_base__status  //
-wuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,
-                                    wuffs_base__pixel_format dst_format,
-                                    wuffs_base__slice_u8 dst_palette,
-                                    wuffs_base__pixel_format src_format,
-                                    wuffs_base__slice_u8 src_palette,
-                                    wuffs_base__pixel_blend blend) {
-  if (!p) {
-    return wuffs_base__make_status(wuffs_base__error__bad_receiver);
-  }
-
-  // TODO: support many more formats.
-
-  wuffs_base__pixel_swizzler__func func = NULL;
-
-  switch (src_format.repr) {
-    case WUFFS_BASE__PIXEL_FORMAT__Y:
-      func = wuffs_base__pixel_swizzler__prepare__y(p, dst_format, dst_palette,
-                                                    src_palette, blend);
-      break;
-
-    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:
-      func = wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(
-          p, dst_format, dst_palette, src_palette, blend);
-      break;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGR:
-      func = wuffs_base__pixel_swizzler__prepare__bgr(
-          p, dst_format, dst_palette, src_palette, blend);
-      break;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
-      func = wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(
-          p, dst_format, dst_palette, src_palette, blend);
-      break;
-  }
-
-  p->private_impl.func = func;
-  return wuffs_base__make_status(
-      func ? NULL : wuffs_base__error__unsupported_pixel_swizzler_option);
-}
-
-uint64_t  //
-wuffs_base__pixel_swizzler__swizzle_interleaved(
-    const wuffs_base__pixel_swizzler* p,
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  if (p && p->private_impl.func) {
-    return (*p->private_impl.func)(dst, dst_palette, src);
-  }
-  return 0;
-}
diff --git a/internal/cgen/base/pixconv-submodule.c b/internal/cgen/base/pixconv-submodule.c
new file mode 100644
index 0000000..1e065bd
--- /dev/null
+++ b/internal/cgen/base/pixconv-submodule.c
@@ -0,0 +1,1190 @@
+// After editing this file, run "go generate" in the parent directory.
+
+// Copyright 2017 The Wuffs Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// ---------------- Pixel Swizzler
+
+static inline uint32_t  //
+wuffs_base__composite_nonpremul_nonpremul_u32_axxx(uint32_t dst_nonpremul,
+                                                   uint32_t src_nonpremul) {
+  // Convert from 8-bit color to 16-bit color.
+  uint32_t sa = 0x101 * (0xFF & (src_nonpremul >> 24));
+  uint32_t sr = 0x101 * (0xFF & (src_nonpremul >> 16));
+  uint32_t sg = 0x101 * (0xFF & (src_nonpremul >> 8));
+  uint32_t sb = 0x101 * (0xFF & (src_nonpremul >> 0));
+  uint32_t da = 0x101 * (0xFF & (dst_nonpremul >> 24));
+  uint32_t dr = 0x101 * (0xFF & (dst_nonpremul >> 16));
+  uint32_t dg = 0x101 * (0xFF & (dst_nonpremul >> 8));
+  uint32_t db = 0x101 * (0xFF & (dst_nonpremul >> 0));
+
+  // Convert dst from nonpremul to premul.
+  dr = (dr * da) / 0xFFFF;
+  dg = (dg * da) / 0xFFFF;
+  db = (db * da) / 0xFFFF;
+
+  // Calculate the inverse of the src-alpha: how much of the dst to keep.
+  uint32_t ia = 0xFFFF - sa;
+
+  // Composite src (nonpremul) over dst (premul).
+  da = sa + ((da * ia) / 0xFFFF);
+  dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
+  dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
+  db = ((sb * sa) + (db * ia)) / 0xFFFF;
+
+  // Convert dst from premul to nonpremul.
+  if (da != 0) {
+    dr = (dr * 0xFFFF) / da;
+    dg = (dg * 0xFFFF) / da;
+    db = (db * 0xFFFF) / da;
+  }
+
+  // Convert from 16-bit color to 8-bit color and combine the components.
+  da >>= 8;
+  dr >>= 8;
+  dg >>= 8;
+  db >>= 8;
+  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
+}
+
+static inline uint32_t  //
+wuffs_base__composite_nonpremul_premul_u32_axxx(uint32_t dst_nonpremul,
+                                                uint32_t src_premul) {
+  // Convert from 8-bit color to 16-bit color.
+  uint32_t sa = 0x101 * (0xFF & (src_premul >> 24));
+  uint32_t sr = 0x101 * (0xFF & (src_premul >> 16));
+  uint32_t sg = 0x101 * (0xFF & (src_premul >> 8));
+  uint32_t sb = 0x101 * (0xFF & (src_premul >> 0));
+  uint32_t da = 0x101 * (0xFF & (dst_nonpremul >> 24));
+  uint32_t dr = 0x101 * (0xFF & (dst_nonpremul >> 16));
+  uint32_t dg = 0x101 * (0xFF & (dst_nonpremul >> 8));
+  uint32_t db = 0x101 * (0xFF & (dst_nonpremul >> 0));
+
+  // Convert dst from nonpremul to premul.
+  dr = (dr * da) / 0xFFFF;
+  dg = (dg * da) / 0xFFFF;
+  db = (db * da) / 0xFFFF;
+
+  // Calculate the inverse of the src-alpha: how much of the dst to keep.
+  uint32_t ia = 0xFFFF - sa;
+
+  // Composite src (premul) over dst (premul).
+  da = sa + ((da * ia) / 0xFFFF);
+  dr = sr + ((dr * ia) / 0xFFFF);
+  dg = sg + ((dg * ia) / 0xFFFF);
+  db = sb + ((db * ia) / 0xFFFF);
+
+  // Convert dst from premul to nonpremul.
+  if (da != 0) {
+    dr = (dr * 0xFFFF) / da;
+    dg = (dg * 0xFFFF) / da;
+    db = (db * 0xFFFF) / da;
+  }
+
+  // Convert from 16-bit color to 8-bit color and combine the components.
+  da >>= 8;
+  dr >>= 8;
+  dg >>= 8;
+  db >>= 8;
+  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
+}
+
+static inline uint32_t  //
+wuffs_base__composite_premul_nonpremul_u32_axxx(uint32_t dst_premul,
+                                                uint32_t src_nonpremul) {
+  // Convert from 8-bit color to 16-bit color.
+  uint32_t sa = 0x101 * (0xFF & (src_nonpremul >> 24));
+  uint32_t sr = 0x101 * (0xFF & (src_nonpremul >> 16));
+  uint32_t sg = 0x101 * (0xFF & (src_nonpremul >> 8));
+  uint32_t sb = 0x101 * (0xFF & (src_nonpremul >> 0));
+  uint32_t da = 0x101 * (0xFF & (dst_premul >> 24));
+  uint32_t dr = 0x101 * (0xFF & (dst_premul >> 16));
+  uint32_t dg = 0x101 * (0xFF & (dst_premul >> 8));
+  uint32_t db = 0x101 * (0xFF & (dst_premul >> 0));
+
+  // Calculate the inverse of the src-alpha: how much of the dst to keep.
+  uint32_t ia = 0xFFFF - sa;
+
+  // Composite src (nonpremul) over dst (premul).
+  da = sa + ((da * ia) / 0xFFFF);
+  dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
+  dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
+  db = ((sb * sa) + (db * ia)) / 0xFFFF;
+
+  // Convert from 16-bit color to 8-bit color and combine the components.
+  da >>= 8;
+  dr >>= 8;
+  dg >>= 8;
+  db >>= 8;
+  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
+}
+
+static inline uint32_t  //
+wuffs_base__composite_premul_premul_u32_axxx(uint32_t dst_premul,
+                                             uint32_t src_premul) {
+  // Convert from 8-bit color to 16-bit color.
+  uint32_t sa = 0x101 * (0xFF & (src_premul >> 24));
+  uint32_t sr = 0x101 * (0xFF & (src_premul >> 16));
+  uint32_t sg = 0x101 * (0xFF & (src_premul >> 8));
+  uint32_t sb = 0x101 * (0xFF & (src_premul >> 0));
+  uint32_t da = 0x101 * (0xFF & (dst_premul >> 24));
+  uint32_t dr = 0x101 * (0xFF & (dst_premul >> 16));
+  uint32_t dg = 0x101 * (0xFF & (dst_premul >> 8));
+  uint32_t db = 0x101 * (0xFF & (dst_premul >> 0));
+
+  // Calculate the inverse of the src-alpha: how much of the dst to keep.
+  uint32_t ia = 0xFFFF - sa;
+
+  // Composite src (premul) over dst (premul).
+  da = sa + ((da * ia) / 0xFFFF);
+  dr = sr + ((dr * ia) / 0xFFFF);
+  dg = sg + ((dg * ia) / 0xFFFF);
+  db = sb + ((db * ia) / 0xFFFF);
+
+  // Convert from 16-bit color to 8-bit color and combine the components.
+  da >>= 8;
+  dr >>= 8;
+  dg >>= 8;
+  db >>= 8;
+  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__squash_bgr_565_888(wuffs_base__slice_u8 dst,
+                                               wuffs_base__slice_u8 src) {
+  size_t len4 = (dst.len < src.len ? dst.len : src.len) / 4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+
+  size_t n = len4;
+  while (n--) {
+    uint32_t argb = wuffs_base__load_u32le__no_bounds_check(s);
+    uint32_t b5 = 0x1F & (argb >> (8 - 5));
+    uint32_t g6 = 0x3F & (argb >> (16 - 6));
+    uint32_t r5 = 0x1F & (argb >> (24 - 5));
+    uint32_t alpha = argb & 0xFF000000;
+    wuffs_base__store_u32le__no_bounds_check(
+        d, alpha | (r5 << 11) | (g6 << 5) | (b5 << 0));
+    s += 4;
+    d += 4;
+  }
+  return len4 * 4;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__swap_rgbx_bgrx(wuffs_base__slice_u8 dst,
+                                           wuffs_base__slice_u8 src) {
+  size_t len4 = (dst.len < src.len ? dst.len : src.len) / 4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+
+  size_t n = len4;
+  while (n--) {
+    uint8_t b0 = s[0];
+    uint8_t b1 = s[1];
+    uint8_t b2 = s[2];
+    uint8_t b3 = s[3];
+    d[0] = b2;
+    d[1] = b1;
+    d[2] = b0;
+    d[3] = b3;
+    s += 4;
+    d += 4;
+  }
+  return len4 * 4;
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__copy_1_1(wuffs_base__slice_u8 dst,
+                                     wuffs_base__slice_u8 dst_palette,
+                                     wuffs_base__slice_u8 src) {
+  return wuffs_base__slice_u8__copy_from_slice(dst, src);
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__copy_3_3(wuffs_base__slice_u8 dst,
+                                     wuffs_base__slice_u8 dst_palette,
+                                     wuffs_base__slice_u8 src) {
+  size_t dst_len3 = dst.len / 3;
+  size_t src_len3 = src.len / 3;
+  size_t len = dst_len3 < src_len3 ? dst_len3 : src_len3;
+  if (len > 0) {
+    memmove(dst.ptr, src.ptr, len * 3);
+  }
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__copy_4_4(wuffs_base__slice_u8 dst,
+                                     wuffs_base__slice_u8 dst_palette,
+                                     wuffs_base__slice_u8 src) {
+  size_t dst_len4 = dst.len / 4;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
+  if (len > 0) {
+    memmove(dst.ptr, src.ptr, len * 4);
+  }
+  return len;
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr_565__bgr(wuffs_base__slice_u8 dst,
+                                         wuffs_base__slice_u8 dst_palette,
+                                         wuffs_base__slice_u8 src) {
+  size_t dst_len2 = dst.len / 2;
+  size_t src_len3 = src.len / 3;
+  size_t len = dst_len2 < src_len3 ? dst_len2 : src_len3;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t b5 = s[0] >> 3;
+    uint32_t g6 = s[1] >> 2;
+    uint32_t r5 = s[2] >> 3;
+    uint32_t rgb_565 = (r5 << 11) | (g6 << 5) | (b5 << 0);
+    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);
+
+    s += 1 * 3;
+    d += 1 * 2;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  size_t dst_len2 = dst.len / 2;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len2 < src_len4 ? dst_len2 : src_len4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    wuffs_base__store_u16le__no_bounds_check(
+        d + (0 * 2),
+        wuffs_base__color_u32_argb_premul__as__color_u16_rgb_565(
+            wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(
+                wuffs_base__load_u32le__no_bounds_check(s + (0 * 4)))));
+
+    s += 1 * 4;
+    d += 1 * 2;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  size_t dst_len2 = dst.len / 2;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len2 < src_len4 ? dst_len2 : src_len4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    // Convert from 8-bit color to 16-bit color.
+    uint32_t sa = 0x101 * ((uint32_t)s[3]);
+    uint32_t sr = 0x101 * ((uint32_t)s[2]);
+    uint32_t sg = 0x101 * ((uint32_t)s[1]);
+    uint32_t sb = 0x101 * ((uint32_t)s[0]);
+
+    // Convert from 565 color to 16-bit color.
+    uint32_t old_rgb_565 = wuffs_base__load_u16le__no_bounds_check(d + (0 * 2));
+    uint32_t old_r5 = 0x1F & (old_rgb_565 >> 11);
+    uint32_t dr = (0x8421 * old_r5) >> 4;
+    uint32_t old_g6 = 0x3F & (old_rgb_565 >> 5);
+    uint32_t dg = (0x1041 * old_g6) >> 2;
+    uint32_t old_b5 = 0x1F & (old_rgb_565 >> 0);
+    uint32_t db = (0x8421 * old_b5) >> 4;
+
+    // Calculate the inverse of the src-alpha: how much of the dst to keep.
+    uint32_t ia = 0xFFFF - sa;
+
+    // Composite src (nonpremul) over dst (premul).
+    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
+    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
+    db = ((sb * sa) + (db * ia)) / 0xFFFF;
+
+    // Convert from 16-bit color to 565 color and combine the components.
+    uint32_t new_r5 = 0x1F & (dr >> 11);
+    uint32_t new_g6 = 0x3F & (dg >> 10);
+    uint32_t new_b5 = 0x1F & (db >> 11);
+    uint32_t new_rgb_565 = (new_r5 << 11) | (new_g6 << 5) | (new_b5 << 0);
+    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2),
+                                             (uint16_t)new_rgb_565);
+
+    s += 1 * 4;
+    d += 1 * 2;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr_565__y(wuffs_base__slice_u8 dst,
+                                       wuffs_base__slice_u8 dst_palette,
+                                       wuffs_base__slice_u8 src) {
+  size_t dst_len2 = dst.len / 2;
+  size_t len = dst_len2 < src.len ? dst_len2 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t y5 = s[0] >> 3;
+    uint32_t y6 = s[0] >> 2;
+    uint32_t rgb_565 = (y5 << 11) | (y6 << 5) | (y5 << 0);
+    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);
+
+    s += 1 * 1;
+    d += 1 * 2;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr_565__index__src(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  if (dst_palette.len != 1024) {
+    return 0;
+  }
+  size_t dst_len2 = dst.len / 2;
+  size_t len = dst_len2 < src.len ? dst_len2 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  const size_t loop_unroll_count = 4;
+
+  while (n >= loop_unroll_count) {
+    wuffs_base__store_u16le__no_bounds_check(
+        d + (0 * 2), wuffs_base__load_u16le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[0] * 4)));
+    wuffs_base__store_u16le__no_bounds_check(
+        d + (1 * 2), wuffs_base__load_u16le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[1] * 4)));
+    wuffs_base__store_u16le__no_bounds_check(
+        d + (2 * 2), wuffs_base__load_u16le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[2] * 4)));
+    wuffs_base__store_u16le__no_bounds_check(
+        d + (3 * 2), wuffs_base__load_u16le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[3] * 4)));
+
+    s += loop_unroll_count * 1;
+    d += loop_unroll_count * 2;
+    n -= loop_unroll_count;
+  }
+
+  while (n >= 1) {
+    wuffs_base__store_u16le__no_bounds_check(
+        d + (0 * 2), wuffs_base__load_u16le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[0] * 4)));
+
+    s += 1 * 1;
+    d += 1 * 2;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  if (dst_palette.len != 1024) {
+    return 0;
+  }
+  size_t dst_len2 = dst.len / 2;
+  size_t len = dst_len2 < src.len ? dst_len2 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[0] * 4));
+    if (s0) {
+      wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)s0);
+    }
+
+    s += 1 * 1;
+    d += 1 * 2;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  size_t dst_len3 = dst.len / 3;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len3 < src_len4 ? dst_len3 : src_len4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t s0 =
+        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(
+            wuffs_base__load_u32le__no_bounds_check(s + (0 * 4)));
+    wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
+
+    s += 1 * 4;
+    d += 1 * 3;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  size_t dst_len3 = dst.len / 3;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len3 < src_len4 ? dst_len3 : src_len4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    // Convert from 8-bit color to 16-bit color.
+    uint32_t sa = 0x101 * ((uint32_t)s[3]);
+    uint32_t sr = 0x101 * ((uint32_t)s[2]);
+    uint32_t sg = 0x101 * ((uint32_t)s[1]);
+    uint32_t sb = 0x101 * ((uint32_t)s[0]);
+    uint32_t dr = 0x101 * ((uint32_t)d[2]);
+    uint32_t dg = 0x101 * ((uint32_t)d[1]);
+    uint32_t db = 0x101 * ((uint32_t)d[0]);
+
+    // Calculate the inverse of the src-alpha: how much of the dst to keep.
+    uint32_t ia = 0xFFFF - sa;
+
+    // Composite src (nonpremul) over dst (premul).
+    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
+    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
+    db = ((sb * sa) + (db * ia)) / 0xFFFF;
+
+    // Convert from 16-bit color to 8-bit color.
+    d[0] = (uint8_t)(db >> 8);
+    d[1] = (uint8_t)(dg >> 8);
+    d[2] = (uint8_t)(dr >> 8);
+
+    s += 1 * 4;
+    d += 1 * 3;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  size_t dst_len4 = dst.len / 4;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t d0 = wuffs_base__load_u32le__no_bounds_check(d + (0 * 4));
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 4),
+        wuffs_base__composite_nonpremul_nonpremul_u32_axxx(d0, s0));
+
+    s += 1 * 4;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  size_t dst_len4 = dst.len / 4;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 4),
+        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(s0));
+
+    s += 1 * 4;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  size_t dst_len4 = dst.len / 4;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t d0 = wuffs_base__load_u32le__no_bounds_check(d + (0 * 4));
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 4), wuffs_base__composite_premul_nonpremul_u32_axxx(d0, s0));
+
+    s += 1 * 4;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxx__index__src(wuffs_base__slice_u8 dst,
+                                            wuffs_base__slice_u8 dst_palette,
+                                            wuffs_base__slice_u8 src) {
+  if (dst_palette.len != 1024) {
+    return 0;
+  }
+  size_t dst_len3 = dst.len / 3;
+  size_t len = dst_len3 < src.len ? dst_len3 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  const size_t loop_unroll_count = 4;
+
+  // The comparison in the while condition is ">", not ">=", because with
+  // ">=", the last 4-byte store could write past the end of the dst slice.
+  //
+  // Each 4-byte store writes one too many bytes, but a subsequent store
+  // will overwrite that with the correct byte. There is always another
+  // store, whether a 4-byte store in this loop or a 1-byte store in the
+  // next loop.
+  while (n > loop_unroll_count) {
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 3), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[0] * 4)));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (1 * 3), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[1] * 4)));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (2 * 3), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[2] * 4)));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (3 * 3), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[3] * 4)));
+
+    s += loop_unroll_count * 1;
+    d += loop_unroll_count * 3;
+    n -= loop_unroll_count;
+  }
+
+  while (n >= 1) {
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[0] * 4));
+    wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
+
+    s += 1 * 1;
+    d += 1 * 3;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  if (dst_palette.len != 1024) {
+    return 0;
+  }
+  size_t dst_len3 = dst.len / 3;
+  size_t len = dst_len3 < src.len ? dst_len3 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  const size_t loop_unroll_count = 4;
+
+  while (n >= loop_unroll_count) {
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[0] * 4));
+    if (s0) {
+      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
+    }
+    uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[1] * 4));
+    if (s1) {
+      wuffs_base__store_u24le__no_bounds_check(d + (1 * 3), s1);
+    }
+    uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[2] * 4));
+    if (s2) {
+      wuffs_base__store_u24le__no_bounds_check(d + (2 * 3), s2);
+    }
+    uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[3] * 4));
+    if (s3) {
+      wuffs_base__store_u24le__no_bounds_check(d + (3 * 3), s3);
+    }
+
+    s += loop_unroll_count * 1;
+    d += loop_unroll_count * 3;
+    n -= loop_unroll_count;
+  }
+
+  while (n >= 1) {
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[0] * 4));
+    if (s0) {
+      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
+    }
+
+    s += 1 * 1;
+    d += 1 * 3;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxx__y(wuffs_base__slice_u8 dst,
+                                   wuffs_base__slice_u8 dst_palette,
+                                   wuffs_base__slice_u8 src) {
+  size_t dst_len3 = dst.len / 3;
+  size_t len = dst_len3 < src.len ? dst_len3 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint8_t s0 = s[0];
+    d[0] = s0;
+    d[1] = s0;
+    d[2] = s0;
+
+    s += 1 * 1;
+    d += 1 * 3;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxxx__index__src(wuffs_base__slice_u8 dst,
+                                             wuffs_base__slice_u8 dst_palette,
+                                             wuffs_base__slice_u8 src) {
+  if (dst_palette.len != 1024) {
+    return 0;
+  }
+  size_t dst_len4 = dst.len / 4;
+  size_t len = dst_len4 < src.len ? dst_len4 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  const size_t loop_unroll_count = 4;
+
+  while (n >= loop_unroll_count) {
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[0] * 4)));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (1 * 4), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[1] * 4)));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (2 * 4), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[2] * 4)));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (3 * 4), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[3] * 4)));
+
+    s += loop_unroll_count * 1;
+    d += loop_unroll_count * 4;
+    n -= loop_unroll_count;
+  }
+
+  while (n >= 1) {
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[0] * 4)));
+
+    s += 1 * 1;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  if (dst_palette.len != 1024) {
+    return 0;
+  }
+  size_t dst_len4 = dst.len / 4;
+  size_t len = dst_len4 < src.len ? dst_len4 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  const size_t loop_unroll_count = 4;
+
+  while (n >= loop_unroll_count) {
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[0] * 4));
+    if (s0) {
+      wuffs_base__store_u32le__no_bounds_check(d + (0 * 4), s0);
+    }
+    uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[1] * 4));
+    if (s1) {
+      wuffs_base__store_u32le__no_bounds_check(d + (1 * 4), s1);
+    }
+    uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[2] * 4));
+    if (s2) {
+      wuffs_base__store_u32le__no_bounds_check(d + (2 * 4), s2);
+    }
+    uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[3] * 4));
+    if (s3) {
+      wuffs_base__store_u32le__no_bounds_check(d + (3 * 4), s3);
+    }
+
+    s += loop_unroll_count * 1;
+    d += loop_unroll_count * 4;
+    n -= loop_unroll_count;
+  }
+
+  while (n >= 1) {
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[0] * 4));
+    if (s0) {
+      wuffs_base__store_u32le__no_bounds_check(d + (0 * 4), s0);
+    }
+
+    s += 1 * 1;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxxx__xxx(wuffs_base__slice_u8 dst,
+                                      wuffs_base__slice_u8 dst_palette,
+                                      wuffs_base__slice_u8 src) {
+  size_t dst_len4 = dst.len / 4;
+  size_t src_len3 = src.len / 3;
+  size_t len = dst_len4 < src_len3 ? dst_len4 : src_len3;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 4),
+        0xFF000000 | wuffs_base__load_u24le__no_bounds_check(s + (0 * 3)));
+
+    s += 1 * 3;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxxx__y(wuffs_base__slice_u8 dst,
+                                    wuffs_base__slice_u8 dst_palette,
+                                    wuffs_base__slice_u8 src) {
+  size_t dst_len4 = dst.len / 4;
+  size_t len = dst_len4 < src.len ? dst_len4 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 4), 0xFF000000 | (0x010101 * (uint32_t)s[0]));
+
+    s += 1 * 1;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static wuffs_base__pixel_swizzler__func  //
+wuffs_base__pixel_swizzler__prepare__y(wuffs_base__pixel_swizzler* p,
+                                       wuffs_base__pixel_format dst_format,
+                                       wuffs_base__slice_u8 dst_palette,
+                                       wuffs_base__slice_u8 src_palette,
+                                       wuffs_base__pixel_blend blend) {
+  switch (dst_format.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
+      return wuffs_base__pixel_swizzler__bgr_565__y;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGR:
+    case WUFFS_BASE__PIXEL_FORMAT__RGB:
+      return wuffs_base__pixel_swizzler__xxx__y;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
+      return wuffs_base__pixel_swizzler__xxxx__y;
+  }
+  return NULL;
+}
+
+static wuffs_base__pixel_swizzler__func  //
+wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(
+    wuffs_base__pixel_swizzler* p,
+    wuffs_base__pixel_format dst_format,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src_palette,
+    wuffs_base__pixel_blend blend) {
+  switch (dst_format.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:
+      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
+          1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__copy_1_1;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
+      if (wuffs_base__pixel_swizzler__squash_bgr_565_888(dst_palette,
+                                                         src_palette) != 1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__bgr_565__index__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGR:
+      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
+          1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__xxx__index__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
+      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
+          1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__xxxx__index__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__RGB:
+      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(dst_palette,
+                                                     src_palette) != 1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__xxx__index__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
+      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(dst_palette,
+                                                     src_palette) != 1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__xxxx__index__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;
+      }
+      return NULL;
+  }
+  return NULL;
+}
+
+static wuffs_base__pixel_swizzler__func  //
+wuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,
+                                         wuffs_base__pixel_format dst_format,
+                                         wuffs_base__slice_u8 dst_palette,
+                                         wuffs_base__slice_u8 src_palette,
+                                         wuffs_base__pixel_blend blend) {
+  switch (dst_format.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
+      return wuffs_base__pixel_swizzler__bgr_565__bgr;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGR:
+      return wuffs_base__pixel_swizzler__copy_3_3;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
+      return wuffs_base__pixel_swizzler__xxxx__xxx;
+
+    case WUFFS_BASE__PIXEL_FORMAT__RGB:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
+      // TODO.
+      break;
+  }
+  return NULL;
+}
+
+static wuffs_base__pixel_swizzler__func  //
+wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(
+    wuffs_base__pixel_swizzler* p,
+    wuffs_base__pixel_format dst_format,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src_palette,
+    wuffs_base__pixel_blend blend) {
+  switch (dst_format.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGR:
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__copy_4_4;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
+      // TODO.
+      break;
+
+    case WUFFS_BASE__PIXEL_FORMAT__RGB:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
+      // TODO.
+      break;
+  }
+  return NULL;
+}
+
+// --------
+
+wuffs_base__status  //
+wuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,
+                                    wuffs_base__pixel_format dst_format,
+                                    wuffs_base__slice_u8 dst_palette,
+                                    wuffs_base__pixel_format src_format,
+                                    wuffs_base__slice_u8 src_palette,
+                                    wuffs_base__pixel_blend blend) {
+  if (!p) {
+    return wuffs_base__make_status(wuffs_base__error__bad_receiver);
+  }
+
+  // TODO: support many more formats.
+
+  wuffs_base__pixel_swizzler__func func = NULL;
+
+  switch (src_format.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__Y:
+      func = wuffs_base__pixel_swizzler__prepare__y(p, dst_format, dst_palette,
+                                                    src_palette, blend);
+      break;
+
+    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:
+      func = wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(
+          p, dst_format, dst_palette, src_palette, blend);
+      break;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGR:
+      func = wuffs_base__pixel_swizzler__prepare__bgr(
+          p, dst_format, dst_palette, src_palette, blend);
+      break;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+      func = wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(
+          p, dst_format, dst_palette, src_palette, blend);
+      break;
+  }
+
+  p->private_impl.func = func;
+  return wuffs_base__make_status(
+      func ? NULL : wuffs_base__error__unsupported_pixel_swizzler_option);
+}
+
+uint64_t  //
+wuffs_base__pixel_swizzler__swizzle_interleaved(
+    const wuffs_base__pixel_swizzler* p,
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  if (p && p->private_impl.func) {
+    return (*p->private_impl.func)(dst, dst_palette, src);
+  }
+  return 0;
+}
diff --git a/internal/cgen/base/strconv-impl.c b/internal/cgen/base/strconv-impl.c
index 06710a1..9d29040 100644
--- a/internal/cgen/base/strconv-impl.c
+++ b/internal/cgen/base/strconv-impl.c
@@ -299,1255 +299,6 @@
   } while (0);
 }
 
-  // ---------------- IEEE 754 Floating Point
-
-#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE 1023
-#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION 500
-
-// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL is the largest N
-// such that ((10 << N) < (1 << 64)).
-#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL 60
-
-// wuffs_base__private_implementation__high_prec_dec (abbreviated as HPD) is a
-// fixed precision floating point decimal number, augmented with ±infinity
-// values, but it cannot represent NaN (Not a Number).
-//
-// "High precision" means that the mantissa holds 500 decimal digits. 500 is
-// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION.
-//
-// An HPD isn't for general purpose arithmetic, only for conversions to and
-// from IEEE 754 double-precision floating point, where the largest and
-// smallest positive, finite values are approximately 1.8e+308 and 4.9e-324.
-// HPD exponents above +1023 mean infinity, below -1023 mean zero. The ±1023
-// bounds are further away from zero than ±(324 + 500), where 500 and 1023 is
-// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION and
-// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.
-//
-// digits[.. num_digits] are the number's digits in big-endian order. The
-// uint8_t values are in the range [0 ..= 9], not ['0' ..= '9'], where e.g. '7'
-// is the ASCII value 0x37.
-//
-// decimal_point is the index (within digits) of the decimal point. It may be
-// negative or be larger than num_digits, in which case the explicit digits are
-// padded with implicit zeroes.
-//
-// For example, if num_digits is 3 and digits is "\x07\x08\x09":
-//   - A decimal_point of -2 means ".00789"
-//   - A decimal_point of -1 means ".0789"
-//   - A decimal_point of +0 means ".789"
-//   - A decimal_point of +1 means "7.89"
-//   - A decimal_point of +2 means "78.9"
-//   - A decimal_point of +3 means "789."
-//   - A decimal_point of +4 means "7890."
-//   - A decimal_point of +5 means "78900."
-//
-// As above, a decimal_point higher than +1023 means that the overall value is
-// infinity, lower than -1023 means zero.
-//
-// negative is a sign bit. An HPD can distinguish positive and negative zero.
-//
-// truncated is whether there are more than
-// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION digits, and at
-// least one of those extra digits are non-zero. The existence of long-tail
-// digits can affect rounding.
-//
-// The "all fields are zero" value is valid, and represents the number +0.
-typedef struct {
-  uint32_t num_digits;
-  int32_t decimal_point;
-  bool negative;
-  bool truncated;
-  uint8_t digits[WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION];
-} wuffs_base__private_implementation__high_prec_dec;
-
-// wuffs_base__private_implementation__high_prec_dec__trim trims trailing
-// zeroes from the h->digits[.. h->num_digits] slice. They have no benefit,
-// since we explicitly track h->decimal_point.
-//
-// Preconditions:
-//  - h is non-NULL.
-static inline void  //
-wuffs_base__private_implementation__high_prec_dec__trim(
-    wuffs_base__private_implementation__high_prec_dec* h) {
-  while ((h->num_digits > 0) && (h->digits[h->num_digits - 1] == 0)) {
-    h->num_digits--;
-  }
-}
-
-static wuffs_base__status  //
-wuffs_base__private_implementation__high_prec_dec__parse(
-    wuffs_base__private_implementation__high_prec_dec* h,
-    wuffs_base__slice_u8 s) {
-  if (!h) {
-    return wuffs_base__make_status(wuffs_base__error__bad_receiver);
-  }
-  h->num_digits = 0;
-  h->decimal_point = 0;
-  h->negative = false;
-  h->truncated = false;
-
-  uint8_t* p = s.ptr;
-  uint8_t* q = s.ptr + s.len;
-
-  for (; (p < q) && (*p == '_'); p++) {
-  }
-  if (p >= q) {
-    return wuffs_base__make_status(wuffs_base__error__bad_argument);
-  }
-
-  // Parse sign.
-  do {
-    if (*p == '+') {
-      p++;
-    } else if (*p == '-') {
-      h->negative = true;
-      p++;
-    } else {
-      break;
-    }
-    for (; (p < q) && (*p == '_'); p++) {
-    }
-  } while (0);
-
-  // Parse digits.
-  uint32_t nd = 0;
-  int32_t dp = 0;
-  bool saw_digits = false;
-  bool saw_non_zero_digits = false;
-  bool saw_dot = false;
-  for (; p < q; p++) {
-    if (*p == '_') {
-      // No-op.
-
-    } else if ((*p == '.') || (*p == ',')) {
-      // As per https://en.wikipedia.org/wiki/Decimal_separator, both '.' or
-      // ',' are commonly used. We just parse either, regardless of LOCALE.
-      if (saw_dot) {
-        return wuffs_base__make_status(wuffs_base__error__bad_argument);
-      }
-      saw_dot = true;
-      dp = (int32_t)nd;
-
-    } else if ('0' == *p) {
-      if (!saw_dot && !saw_non_zero_digits && saw_digits) {
-        // We don't allow unnecessary leading zeroes: "000123" or "0644".
-        return wuffs_base__make_status(wuffs_base__error__bad_argument);
-      }
-      saw_digits = true;
-      if (nd == 0) {
-        // Track leading zeroes implicitly.
-        dp--;
-      } else if (nd <
-                 WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
-        h->digits[nd++] = 0;
-      } else {
-        // Long-tail zeroes are ignored.
-      }
-
-    } else if (('0' < *p) && (*p <= '9')) {
-      if (!saw_dot && !saw_non_zero_digits && saw_digits) {
-        // We don't allow unnecessary leading zeroes: "000123" or "0644".
-        return wuffs_base__make_status(wuffs_base__error__bad_argument);
-      }
-      saw_digits = true;
-      saw_non_zero_digits = true;
-      if (nd < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
-        h->digits[nd++] = (uint8_t)(*p - '0');
-      } else {
-        // Long-tail non-zeroes set the truncated bit.
-        h->truncated = true;
-      }
-
-    } else {
-      break;
-    }
-  }
-
-  if (!saw_digits) {
-    return wuffs_base__make_status(wuffs_base__error__bad_argument);
-  }
-  if (!saw_dot) {
-    dp = (int32_t)nd;
-  }
-
-  // Parse exponent.
-  if ((p < q) && ((*p == 'E') || (*p == 'e'))) {
-    p++;
-    for (; (p < q) && (*p == '_'); p++) {
-    }
-    if (p >= q) {
-      return wuffs_base__make_status(wuffs_base__error__bad_argument);
-    }
-
-    int32_t exp_sign = +1;
-    if (*p == '+') {
-      p++;
-    } else if (*p == '-') {
-      exp_sign = -1;
-      p++;
-    }
-
-    int32_t exp = 0;
-    const int32_t exp_large =
-        WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE +
-        WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION;
-    bool saw_exp_digits = false;
-    for (; p < q; p++) {
-      if (*p == '_') {
-        // No-op.
-      } else if (('0' <= *p) && (*p <= '9')) {
-        saw_exp_digits = true;
-        if (exp < exp_large) {
-          exp = (10 * exp) + ((int32_t)(*p - '0'));
-        }
-      } else {
-        break;
-      }
-    }
-    if (!saw_exp_digits) {
-      return wuffs_base__make_status(wuffs_base__error__bad_argument);
-    }
-    dp += exp_sign * exp;
-  }
-
-  // Finish.
-  if (p != q) {
-    return wuffs_base__make_status(wuffs_base__error__bad_argument);
-  }
-  h->num_digits = nd;
-  if (nd == 0) {
-    h->decimal_point = 0;
-  } else if (dp <
-             -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
-    h->decimal_point =
-        -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE - 1;
-  } else if (dp >
-             +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
-    h->decimal_point =
-        +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE + 1;
-  } else {
-    h->decimal_point = dp;
-  }
-  wuffs_base__private_implementation__high_prec_dec__trim(h);
-  return wuffs_base__make_status(NULL);
-}
-
-// --------
-
-// The etc__hpd_left_shift and etc__powers_of_5 tables were printed by
-// script/print-hpd-left-shift.go. That script has an optional -comments flag,
-// whose output is not copied here, which prints further detail.
-//
-// These tables are used in
-// wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits.
-
-// wuffs_base__private_implementation__hpd_left_shift[i] encodes the number of
-// new digits created after multiplying a positive integer by (1 << i): the
-// additional length in the decimal representation. For example, shifting "234"
-// by 3 (equivalent to multiplying by 8) will produce "1872". Going from a
-// 3-length string to a 4-length string means that 1 new digit was added (and
-// existing digits may have changed).
-//
-// Shifting by i can add either N or N-1 new digits, depending on whether the
-// original positive integer compares >= or < to the i'th power of 5 (as 10
-// equals 2 * 5). Comparison is lexicographic, not numerical.
-//
-// For example, shifting by 4 (i.e. multiplying by 16) can add 1 or 2 new
-// digits, depending on a lexicographic comparison to (5 ** 4), i.e. "625":
-//  - ("1"      << 4) is "16",       which adds 1 new digit.
-//  - ("5678"   << 4) is "90848",    which adds 1 new digit.
-//  - ("624"    << 4) is "9984",     which adds 1 new digit.
-//  - ("62498"  << 4) is "999968",   which adds 1 new digit.
-//  - ("625"    << 4) is "10000",    which adds 2 new digits.
-//  - ("625001" << 4) is "10000016", which adds 2 new digits.
-//  - ("7008"   << 4) is "112128",   which adds 2 new digits.
-//  - ("99"     << 4) is "1584",     which adds 2 new digits.
-//
-// Thus, when i is 4, N is 2 and (5 ** i) is "625". This etc__hpd_left_shift
-// array encodes this as:
-//  - etc__hpd_left_shift[4] is 0x1006 = (2 << 11) | 0x0006.
-//  - etc__hpd_left_shift[5] is 0x1009 = (? << 11) | 0x0009.
-// where the ? isn't relevant for i == 4.
-//
-// The high 5 bits of etc__hpd_left_shift[i] is N, the higher of the two
-// possible number of new digits. The low 11 bits are an offset into the
-// etc__powers_of_5 array (of length 0x051C, so offsets fit in 11 bits). When i
-// is 4, its offset and the next one is 6 and 9, and etc__powers_of_5[6 .. 9]
-// is the string "\x06\x02\x05", so the relevant power of 5 is "625".
-//
-// Thanks to Ken Thompson for the original idea.
-static const uint16_t wuffs_base__private_implementation__hpd_left_shift[65] = {
-    0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817,
-    0x181D, 0x2024, 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067,
-    0x3073, 0x3080, 0x388E, 0x389C, 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF,
-    0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, 0x5180, 0x5998, 0x59B0,
-    0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, 0x72AA,
-    0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC,
-    0x8C02, 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C,
-    0x051C, 0x051C,
-};
-
-// wuffs_base__private_implementation__powers_of_5 contains the powers of 5,
-// concatenated together: "5", "25", "125", "625", "3125", etc.
-static const uint8_t wuffs_base__private_implementation__powers_of_5[0x051C] = {
-    5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, 3, 9,
-    0, 6, 2, 5, 1, 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, 2, 8, 1, 2,
-    5, 2, 4, 4, 1, 4, 0, 6, 2, 5, 1, 2, 2, 0, 7, 0, 3, 1, 2, 5, 6, 1, 0, 3, 5,
-    1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, 5, 1, 5, 2, 5, 8, 7, 8, 9, 0,
-    6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, 3, 8, 1, 4, 6, 9, 7, 2, 6, 5,
-    6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, 8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1,
-    6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, 7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4,
-    1, 8, 5, 7, 9, 1, 0, 1, 5, 6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7,
-    8, 1, 2, 5, 5, 9, 6, 0, 4, 6, 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0,
-    2, 3, 2, 2, 3, 8, 7, 6, 9, 5, 3, 1, 2, 5, 1, 4, 9, 0, 1, 1, 6, 1, 1, 9, 3,
-    8, 4, 7, 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2, 3, 8, 2, 8, 1,
-    2, 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, 6,
-    2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3, 2, 2, 5,
-    7, 4, 6, 1, 5, 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1, 2, 8, 7, 3, 0,
-    7, 7, 3, 9, 2, 5, 7, 8, 1, 2, 5, 2, 3, 2, 8, 3, 0, 6, 4, 3, 6, 5, 3, 8, 6,
-    9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5, 3, 2, 1, 8, 2, 6, 9, 3, 4, 8,
-    1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6, 6, 0, 9, 1, 3, 4, 6, 7, 4, 0, 7,
-    2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3, 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6,
-    1, 3, 2, 8, 1, 2, 5, 1, 4, 5, 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8,
-    0, 6, 6, 4, 0, 6, 2, 5, 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9,
-    0, 3, 3, 2, 0, 3, 1, 2, 5, 3, 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2,
-    9, 5, 1, 6, 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, 9, 8, 9, 4, 0, 3, 5, 4, 5, 8,
-    5, 6, 4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, 7, 0, 1, 7, 7,
-    2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, 3, 5,
-    0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, 2, 2, 7, 3,
-    7, 3, 6, 7, 5, 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, 9, 7, 6, 5, 6, 2,
-    5, 1, 1, 3, 6, 8, 6, 8, 3, 7, 7, 2, 1, 6, 1, 6, 0, 2, 9, 7, 3, 9, 3, 7, 9,
-    8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8, 8, 6, 0, 8, 0, 8, 0, 1, 4, 8,
-    6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, 2, 8, 4, 2, 1, 7, 0, 9, 4, 3, 0, 4,
-    0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, 9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0,
-    8, 5, 4, 7, 1, 5, 2, 0, 2, 0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5,
-    6, 2, 5, 7, 1, 0, 5, 4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1,
-    2, 4, 2, 6, 7, 5, 7, 8, 1, 2, 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5,
-    0, 0, 9, 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8, 9, 0, 6, 2, 5, 1, 7, 7, 6, 3,
-    5, 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0, 6, 6, 8, 9, 4,
-    5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3, 2, 3, 3,
-    8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8, 9, 2, 0, 9, 8,
-    5, 0, 0, 6, 2, 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2, 3, 6, 3, 2, 8, 1, 2,
-    5, 2, 2, 2, 0, 4, 4, 6, 0, 4, 9, 2, 5, 0, 3, 1, 3, 0, 8, 0, 8, 4, 7, 2, 6,
-    3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1, 1, 1, 0, 2, 2, 3, 0, 2, 4, 6, 2,
-    5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3, 1, 6, 6, 8, 0, 9, 0, 8, 2, 0, 3, 1, 2,
-    5, 5, 5, 5, 1, 1, 1, 5, 1, 2, 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5,
-    8, 3, 4, 0, 4, 5, 4, 1, 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5,
-    6, 2, 8, 9, 1, 3, 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8,
-    1, 2, 5, 1, 3, 8, 7, 7, 7, 8, 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9,
-    5, 3, 9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, 6, 2, 5, 6, 9, 3, 8, 8, 9, 3,
-    9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, 5, 5, 6, 7, 6,
-    2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, 6, 1, 4, 1,
-    8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, 6, 5, 6, 2, 5, 1,
-    7, 3, 4, 7, 2, 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, 4, 4, 1, 1, 9, 2, 4, 4,
-    8, 1, 3, 9, 1, 9, 0, 6, 7, 3, 8, 2, 8, 1, 2, 5, 8, 6, 7, 3, 6, 1, 7, 3, 7,
-    9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, 6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3,
-    6, 9, 1, 4, 0, 6, 2, 5,
-};
-
-// wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits
-// returns the number of additional decimal digits when left-shifting by shift.
-//
-// See below for preconditions.
-static uint32_t  //
-wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits(
-    wuffs_base__private_implementation__high_prec_dec* h,
-    uint32_t shift) {
-  // Masking with 0x3F should be unnecessary (assuming the preconditions) but
-  // it's cheap and ensures that we don't overflow the
-  // wuffs_base__private_implementation__hpd_left_shift array.
-  shift &= 63;
-
-  uint32_t x_a = wuffs_base__private_implementation__hpd_left_shift[shift];
-  uint32_t x_b = wuffs_base__private_implementation__hpd_left_shift[shift + 1];
-  uint32_t num_new_digits = x_a >> 11;
-  uint32_t pow5_a = 0x7FF & x_a;
-  uint32_t pow5_b = 0x7FF & x_b;
-
-  const uint8_t* pow5 =
-      &wuffs_base__private_implementation__powers_of_5[pow5_a];
-  uint32_t i = 0;
-  uint32_t n = pow5_b - pow5_a;
-  for (; i < n; i++) {
-    if (i >= h->num_digits) {
-      return num_new_digits - 1;
-    } else if (h->digits[i] == pow5[i]) {
-      continue;
-    } else if (h->digits[i] < pow5[i]) {
-      return num_new_digits - 1;
-    } else {
-      return num_new_digits;
-    }
-  }
-  return num_new_digits;
-}
-
-// --------
-
-// wuffs_base__private_implementation__high_prec_dec__rounded_integer returns
-// the integral (non-fractional) part of h, provided that it is 18 or fewer
-// decimal digits. For 19 or more digits, it returns UINT64_MAX. Note that:
-//   - (1 << 53) is    9007199254740992, which has 16 decimal digits.
-//   - (1 << 56) is   72057594037927936, which has 17 decimal digits.
-//   - (1 << 59) is  576460752303423488, which has 18 decimal digits.
-//   - (1 << 63) is 9223372036854775808, which has 19 decimal digits.
-// and that IEEE 754 double precision has 52 mantissa bits.
-//
-// That integral part is rounded-to-even: rounding 7.5 or 8.5 both give 8.
-//
-// h's negative bit is ignored: rounding -8.6 returns 9.
-//
-// See below for preconditions.
-static uint64_t  //
-wuffs_base__private_implementation__high_prec_dec__rounded_integer(
-    wuffs_base__private_implementation__high_prec_dec* h) {
-  if ((h->num_digits == 0) || (h->decimal_point < 0)) {
-    return 0;
-  } else if (h->decimal_point > 18) {
-    return UINT64_MAX;
-  }
-
-  uint32_t dp = (uint32_t)(h->decimal_point);
-  uint64_t n = 0;
-  uint32_t i = 0;
-  for (; i < dp; i++) {
-    n = (10 * n) + ((i < h->num_digits) ? h->digits[i] : 0);
-  }
-
-  bool round_up = false;
-  if (dp < h->num_digits) {
-    round_up = h->digits[dp] >= 5;
-    if ((h->digits[dp] == 5) && (dp + 1 == h->num_digits)) {
-      // We are exactly halfway. If we're truncated, round up, otherwise round
-      // to even.
-      round_up = h->truncated ||  //
-                 ((dp > 0) && (1 & h->digits[dp - 1]));
-    }
-  }
-  if (round_up) {
-    n++;
-  }
-
-  return n;
-}
-
-// wuffs_base__private_implementation__high_prec_dec__small_xshift shifts h's
-// number (where 'x' is 'l' or 'r' for left or right) by a small shift value.
-//
-// Preconditions:
-//  - h is non-NULL.
-//  - h->decimal_point is "not extreme".
-//  - shift is non-zero.
-//  - shift is "a small shift".
-//
-// "Not extreme" means within
-// ±WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.
-//
-// "A small shift" means not more than
-// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL.
-//
-// wuffs_base__private_implementation__high_prec_dec__rounded_integer and
-// wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits
-// have the same preconditions.
-
-static void  //
-wuffs_base__private_implementation__high_prec_dec__small_lshift(
-    wuffs_base__private_implementation__high_prec_dec* h,
-    uint32_t shift) {
-  if (h->num_digits == 0) {
-    return;
-  }
-  uint32_t num_new_digits =
-      wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits(
-          h, shift);
-  uint32_t rx = h->num_digits - 1;                   // Read  index.
-  uint32_t wx = h->num_digits - 1 + num_new_digits;  // Write index.
-  uint64_t n = 0;
-
-  // Repeat: pick up a digit, put down a digit, right to left.
-  while (((int32_t)rx) >= 0) {
-    n += ((uint64_t)(h->digits[rx])) << shift;
-    uint64_t quo = n / 10;
-    uint64_t rem = n - (10 * quo);
-    if (wx < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
-      h->digits[wx] = (uint8_t)rem;
-    } else if (rem > 0) {
-      h->truncated = true;
-    }
-    n = quo;
-    wx--;
-    rx--;
-  }
-
-  // Put down leading digits, right to left.
-  while (n > 0) {
-    uint64_t quo = n / 10;
-    uint64_t rem = n - (10 * quo);
-    if (wx < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
-      h->digits[wx] = (uint8_t)rem;
-    } else if (rem > 0) {
-      h->truncated = true;
-    }
-    n = quo;
-    wx--;
-  }
-
-  // Finish.
-  h->num_digits += num_new_digits;
-  if (h->num_digits >
-      WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
-    h->num_digits = WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION;
-  }
-  h->decimal_point += (int32_t)num_new_digits;
-  wuffs_base__private_implementation__high_prec_dec__trim(h);
-}
-
-static void  //
-wuffs_base__private_implementation__high_prec_dec__small_rshift(
-    wuffs_base__private_implementation__high_prec_dec* h,
-    uint32_t shift) {
-  uint32_t rx = 0;  // Read  index.
-  uint32_t wx = 0;  // Write index.
-  uint64_t n = 0;
-
-  // Pick up enough leading digits to cover the first shift.
-  while ((n >> shift) == 0) {
-    if (rx < h->num_digits) {
-      // Read a digit.
-      n = (10 * n) + h->digits[rx++];
-    } else if (n == 0) {
-      // h's number used to be zero and remains zero.
-      return;
-    } else {
-      // Read sufficient implicit trailing zeroes.
-      while ((n >> shift) == 0) {
-        n = 10 * n;
-        rx++;
-      }
-      break;
-    }
-  }
-  h->decimal_point -= ((int32_t)(rx - 1));
-  if (h->decimal_point <
-      -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
-    // After the shift, h's number is effectively zero.
-    h->num_digits = 0;
-    h->decimal_point = 0;
-    h->negative = false;
-    h->truncated = false;
-    return;
-  }
-
-  // Repeat: pick up a digit, put down a digit, left to right.
-  uint64_t mask = (((uint64_t)(1)) << shift) - 1;
-  while (rx < h->num_digits) {
-    uint8_t new_digit = ((uint8_t)(n >> shift));
-    n = (10 * (n & mask)) + h->digits[rx++];
-    h->digits[wx++] = new_digit;
-  }
-
-  // Put down trailing digits, left to right.
-  while (n > 0) {
-    uint8_t new_digit = ((uint8_t)(n >> shift));
-    n = 10 * (n & mask);
-    if (wx < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
-      h->digits[wx++] = new_digit;
-    } else if (new_digit > 0) {
-      h->truncated = true;
-    }
-  }
-
-  // Finish.
-  h->num_digits = wx;
-  wuffs_base__private_implementation__high_prec_dec__trim(h);
-}
-
-// --------
-
-// The wuffs_base__private_implementation__etc_powers_of_10 tables were printed
-// by script/print-mpb-powers-of-10.go. That script has an optional -comments
-// flag, whose output is not copied here, which prints further detail.
-//
-// These tables are used in
-// wuffs_base__private_implementation__medium_prec_bin__assign_from_hpd.
-
-// wuffs_base__private_implementation__big_powers_of_10 contains approximations
-// to the powers of 10, ranging from 1e-348 to 1e+340, with the exponent
-// stepping by 8: -348, -340, -332, ..., -12, -4, +4, +12, ..., +340. Each step
-// consists of three uint32_t elements. There are 87 triples, 87 * 3 = 261.
-//
-// For example, the third approximation, for 1e-332, consists of the uint32_t
-// triple (0x3055AC76, 0x8B16FB20, 0xFFFFFB72). The first two of that triple
-// are a little-endian uint64_t value: 0x8B16FB203055AC76. The last one is an
-// int32_t value: -1166. Together, they represent the approximation:
-//   1e-332 ≈ 0x8B16FB203055AC76 * (2 ** -1166)
-// Similarly, the (0x00000000, 0x9C400000, 0xFFFFFFCE) uint32_t triple means:
-//   1e+4   ≈ 0x9C40000000000000 * (2 **   -50)  // This approx'n is exact.
-// Similarly, the (0xD4C4FB27, 0xED63A231, 0x000000A2) uint32_t triple means:
-//   1e+68  ≈ 0xED63A231D4C4FB27 * (2 **   162)
-static const uint32_t
-    wuffs_base__private_implementation__big_powers_of_10[261] = {
-        0x081C0288, 0xFA8FD5A0, 0xFFFFFB3C, 0xA23EBF76, 0xBAAEE17F, 0xFFFFFB57,
-        0x3055AC76, 0x8B16FB20, 0xFFFFFB72, 0x5DCE35EA, 0xCF42894A, 0xFFFFFB8C,
-        0x55653B2D, 0x9A6BB0AA, 0xFFFFFBA7, 0x3D1A45DF, 0xE61ACF03, 0xFFFFFBC1,
-        0xC79AC6CA, 0xAB70FE17, 0xFFFFFBDC, 0xBEBCDC4F, 0xFF77B1FC, 0xFFFFFBF6,
-        0x416BD60C, 0xBE5691EF, 0xFFFFFC11, 0x907FFC3C, 0x8DD01FAD, 0xFFFFFC2C,
-        0x31559A83, 0xD3515C28, 0xFFFFFC46, 0xADA6C9B5, 0x9D71AC8F, 0xFFFFFC61,
-        0x23EE8BCB, 0xEA9C2277, 0xFFFFFC7B, 0x4078536D, 0xAECC4991, 0xFFFFFC96,
-        0x5DB6CE57, 0x823C1279, 0xFFFFFCB1, 0x4DFB5637, 0xC2109436, 0xFFFFFCCB,
-        0x3848984F, 0x9096EA6F, 0xFFFFFCE6, 0x25823AC7, 0xD77485CB, 0xFFFFFD00,
-        0x97BF97F4, 0xA086CFCD, 0xFFFFFD1B, 0x172AACE5, 0xEF340A98, 0xFFFFFD35,
-        0x2A35B28E, 0xB23867FB, 0xFFFFFD50, 0xD2C63F3B, 0x84C8D4DF, 0xFFFFFD6B,
-        0x1AD3CDBA, 0xC5DD4427, 0xFFFFFD85, 0xBB25C996, 0x936B9FCE, 0xFFFFFDA0,
-        0x7D62A584, 0xDBAC6C24, 0xFFFFFDBA, 0x0D5FDAF6, 0xA3AB6658, 0xFFFFFDD5,
-        0xDEC3F126, 0xF3E2F893, 0xFFFFFDEF, 0xAAFF80B8, 0xB5B5ADA8, 0xFFFFFE0A,
-        0x6C7C4A8B, 0x87625F05, 0xFFFFFE25, 0x34C13053, 0xC9BCFF60, 0xFFFFFE3F,
-        0x91BA2655, 0x964E858C, 0xFFFFFE5A, 0x70297EBD, 0xDFF97724, 0xFFFFFE74,
-        0xB8E5B88F, 0xA6DFBD9F, 0xFFFFFE8F, 0x88747D94, 0xF8A95FCF, 0xFFFFFEA9,
-        0x8FA89BCF, 0xB9447093, 0xFFFFFEC4, 0xBF0F156B, 0x8A08F0F8, 0xFFFFFEDF,
-        0x653131B6, 0xCDB02555, 0xFFFFFEF9, 0xD07B7FAC, 0x993FE2C6, 0xFFFFFF14,
-        0x2A2B3B06, 0xE45C10C4, 0xFFFFFF2E, 0x697392D3, 0xAA242499, 0xFFFFFF49,
-        0x8300CA0E, 0xFD87B5F2, 0xFFFFFF63, 0x92111AEB, 0xBCE50864, 0xFFFFFF7E,
-        0x6F5088CC, 0x8CBCCC09, 0xFFFFFF99, 0xE219652C, 0xD1B71758, 0xFFFFFFB3,
-        0x00000000, 0x9C400000, 0xFFFFFFCE, 0x00000000, 0xE8D4A510, 0xFFFFFFE8,
-        0xAC620000, 0xAD78EBC5, 0x00000003, 0xF8940984, 0x813F3978, 0x0000001E,
-        0xC90715B3, 0xC097CE7B, 0x00000038, 0x7BEA5C70, 0x8F7E32CE, 0x00000053,
-        0xABE98068, 0xD5D238A4, 0x0000006D, 0x179A2245, 0x9F4F2726, 0x00000088,
-        0xD4C4FB27, 0xED63A231, 0x000000A2, 0x8CC8ADA8, 0xB0DE6538, 0x000000BD,
-        0x1AAB65DB, 0x83C7088E, 0x000000D8, 0x42711D9A, 0xC45D1DF9, 0x000000F2,
-        0xA61BE758, 0x924D692C, 0x0000010D, 0x1A708DEA, 0xDA01EE64, 0x00000127,
-        0x9AEF774A, 0xA26DA399, 0x00000142, 0xB47D6B85, 0xF209787B, 0x0000015C,
-        0x79DD1877, 0xB454E4A1, 0x00000177, 0x5B9BC5C2, 0x865B8692, 0x00000192,
-        0xC8965D3D, 0xC83553C5, 0x000001AC, 0xFA97A0B3, 0x952AB45C, 0x000001C7,
-        0x99A05FE3, 0xDE469FBD, 0x000001E1, 0xDB398C25, 0xA59BC234, 0x000001FC,
-        0xA3989F5C, 0xF6C69A72, 0x00000216, 0x54E9BECE, 0xB7DCBF53, 0x00000231,
-        0xF22241E2, 0x88FCF317, 0x0000024C, 0xD35C78A5, 0xCC20CE9B, 0x00000266,
-        0x7B2153DF, 0x98165AF3, 0x00000281, 0x971F303A, 0xE2A0B5DC, 0x0000029B,
-        0x5CE3B396, 0xA8D9D153, 0x000002B6, 0xA4A7443C, 0xFB9B7CD9, 0x000002D0,
-        0xA7A44410, 0xBB764C4C, 0x000002EB, 0xB6409C1A, 0x8BAB8EEF, 0x00000306,
-        0xA657842C, 0xD01FEF10, 0x00000320, 0xE9913129, 0x9B10A4E5, 0x0000033B,
-        0xA19C0C9D, 0xE7109BFB, 0x00000355, 0x623BF429, 0xAC2820D9, 0x00000370,
-        0x7AA7CF85, 0x80444B5E, 0x0000038B, 0x03ACDD2D, 0xBF21E440, 0x000003A5,
-        0x5E44FF8F, 0x8E679C2F, 0x000003C0, 0x9C8CB841, 0xD433179D, 0x000003DA,
-        0xB4E31BA9, 0x9E19DB92, 0x000003F5, 0xBADF77D9, 0xEB96BF6E, 0x0000040F,
-        0x9BF0EE6B, 0xAF87023B, 0x0000042A,
-};
-
-// wuffs_base__private_implementation__small_powers_of_10 contains
-// approximations to the powers of 10, ranging from 1e+0 to 1e+7, with the
-// exponent stepping by 1. Each step consists of three uint32_t elements.
-//
-// For example, the third approximation, for 1e+2, consists of the uint32_t
-// triple (0x00000000, 0xC8000000, 0xFFFFFFC7). The first two of that triple
-// are a little-endian uint64_t value: 0xC800000000000000. The last one is an
-// int32_t value: -57. Together, they represent the approximation:
-//   1e+2   ≈ 0xC800000000000000 * (2 **   -57)  // This approx'n is exact.
-// Similarly, the (0x00000000, 0x9C400000, 0xFFFFFFCE) uint32_t triple means:
-//   1e+4   ≈ 0x9C40000000000000 * (2 **   -50)  // This approx'n is exact.
-static const uint32_t
-    wuffs_base__private_implementation__small_powers_of_10[24] = {
-        0x00000000, 0x80000000, 0xFFFFFFC1, 0x00000000, 0xA0000000, 0xFFFFFFC4,
-        0x00000000, 0xC8000000, 0xFFFFFFC7, 0x00000000, 0xFA000000, 0xFFFFFFCA,
-        0x00000000, 0x9C400000, 0xFFFFFFCE, 0x00000000, 0xC3500000, 0xFFFFFFD1,
-        0x00000000, 0xF4240000, 0xFFFFFFD4, 0x00000000, 0x98968000, 0xFFFFFFD8,
-};
-
-// wuffs_base__private_implementation__f64_powers_of_10 holds powers of 10 that
-// can be exactly represented by a float64 (what C calls a double).
-static const double wuffs_base__private_implementation__f64_powers_of_10[23] = {
-    1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
-    1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22,
-};
-
-// --------
-
-// wuffs_base__private_implementation__medium_prec_bin (abbreviated as MPB) is
-// a fixed precision floating point binary number. Unlike IEEE 754 Floating
-// Point, it cannot represent infinity or NaN (Not a Number).
-//
-// "Medium precision" means that the mantissa holds 64 binary digits, a little
-// more than "double precision", and sizeof(MPB) > sizeof(double). 64 is
-// obviously the number of bits in a uint64_t.
-//
-// An MPB isn't for general purpose arithmetic, only for conversions to and
-// from IEEE 754 double-precision floating point.
-//
-// There is no implicit mantissa bit. The mantissa field is zero if and only if
-// the overall floating point value is ±0. An MPB is normalized if the mantissa
-// is zero or its high bit (the 1<<63 bit) is set.
-//
-// There is no negative bit. An MPB can only represent non-negative numbers.
-//
-// The "all fields are zero" value is valid, and represents the number +0.
-//
-// This is the "Do It Yourself Floating Point" data structure from Loitsch,
-// "Printing Floating-Point Numbers Quickly and Accurately with Integers"
-// (https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf).
-//
-// Florian Loitsch is also the primary contributor to
-// https://github.com/google/double-conversion
-typedef struct {
-  uint64_t mantissa;
-  int32_t exp2;
-} wuffs_base__private_implementation__medium_prec_bin;
-
-static uint32_t  //
-wuffs_base__private_implementation__medium_prec_bin__normalize(
-    wuffs_base__private_implementation__medium_prec_bin* m) {
-  if (m->mantissa == 0) {
-    return 0;
-  }
-  uint32_t shift = wuffs_base__count_leading_zeroes_u64(m->mantissa);
-  m->mantissa <<= shift;
-  m->exp2 -= (int32_t)shift;
-  return shift;
-}
-
-// wuffs_base__private_implementation__medium_prec_bin__mul_pow_10 sets m to be
-// (m * pow), where pow comes from an etc_powers_of_10 triple starting at p.
-//
-// The result is rounded, but not necessarily normalized.
-//
-// Preconditions:
-//  - m is non-NULL.
-//  - m->mantissa is non-zero.
-//  - m->mantissa's high bit is set (i.e. m is normalized).
-//
-// The etc_powers_of_10 triple is already normalized.
-static void  //
-wuffs_base__private_implementation__medium_prec_bin__mul_pow_10(
-    wuffs_base__private_implementation__medium_prec_bin* m,
-    const uint32_t* p) {
-  uint64_t p_mantissa = ((uint64_t)p[0]) | (((uint64_t)p[1]) << 32);
-  int32_t p_exp2 = (int32_t)p[2];
-
-  wuffs_base__multiply_u64__output o =
-      wuffs_base__multiply_u64(m->mantissa, p_mantissa);
-  // Round the mantissa up. It cannot overflow because the maximum possible
-  // value of o.hi is 0xFFFFFFFFFFFFFFFE.
-  m->mantissa = o.hi + (o.lo >> 63);
-  m->exp2 = m->exp2 + p_exp2 + 64;
-}
-
-// wuffs_base__private_implementation__medium_prec_bin__as_f64 converts m to a
-// double (what C calls a double-precision float64).
-//
-// Preconditions:
-//  - m is non-NULL.
-//  - m->mantissa is non-zero.
-//  - m->mantissa's high bit is set (i.e. m is normalized).
-static double  //
-wuffs_base__private_implementation__medium_prec_bin__as_f64(
-    const wuffs_base__private_implementation__medium_prec_bin* m,
-    bool negative) {
-  uint64_t mantissa64 = m->mantissa;
-  // An mpb's mantissa has the implicit (binary) decimal point at the right
-  // hand end of the mantissa's explicit digits. A double-precision's mantissa
-  // has that decimal point near the left hand end. There's also an explicit
-  // versus implicit leading 1 bit (binary digit). Together, the difference in
-  // semantics corresponds to adding 63.
-  int32_t exp2 = m->exp2 + 63;
-
-  // Ensure that exp2 is at least -1022, the minimum double-precision exponent
-  // for normal (as opposed to subnormal) numbers.
-  if (-1022 > exp2) {
-    uint32_t n = (uint32_t)(-1022 - exp2);
-    mantissa64 >>= n;
-    exp2 += (int32_t)n;
-  }
-
-  // Extract the (1 + 52) bits from the 64-bit mantissa64. 52 is the number of
-  // explicit mantissa bits in a double-precision f64.
-  //
-  // Before, we have 64 bits and due to normalization, the high bit 'H' is 1.
-  // 63        55        47       etc     15        7
-  // H210_9876_5432_1098_7654_etc_etc_etc_5432_1098_7654_3210
-  // ++++_++++_++++_++++_++++_etc_etc_etc_++++_+..._...._....  Kept bits.
-  // ...._...._...H_2109_8765_etc_etc_etc_6543_2109_8765_4321  After shifting.
-  // After, we have 53 bits (and bit #52 is this 'H' bit).
-  uint64_t mantissa53 = mantissa64 >> 11;
-
-  // Round up if the old bit #10 (the highest bit dropped by shifting) was set.
-  // We also fix any overflow from rounding up.
-  if (mantissa64 & 1024) {
-    mantissa53++;
-    if ((mantissa53 >> 53) != 0) {
-      mantissa53 >>= 1;
-      exp2++;
-    }
-  }
-
-  // Handle double-precision infinity (a nominal exponent of 1024) and
-  // subnormals (an exponent of -1023 and no implicit mantissa bit, bit #52).
-  if (exp2 >= 1024) {
-    mantissa53 = 0;
-    exp2 = 1024;
-  } else if ((mantissa53 >> 52) == 0) {
-    exp2 = -1023;
-  }
-
-  // Pack the bits and return.
-  const int32_t f64_bias = -1023;
-  uint64_t exp2_bits =
-      (uint64_t)((exp2 - f64_bias) & 0x07FF);           // (1 << 11) - 1.
-  uint64_t bits = (mantissa53 & 0x000FFFFFFFFFFFFF) |   // (1 << 52) - 1.
-                  (exp2_bits << 52) |                   //
-                  (negative ? 0x8000000000000000 : 0);  // (1 << 63).
-  return wuffs_base__ieee_754_bit_representation__to_f64(bits);
-}
-
-// wuffs_base__private_implementation__medium_prec_bin__parse_number_f64
-// converts from an HPD to a double, using an MPB as scratch space. It returns
-// a NULL status.repr if there is no ambiguity in the truncation or rounding to
-// a float64 (an IEEE 754 double-precision floating point value).
-//
-// It may modify m even if it returns a non-NULL status.repr.
-static wuffs_base__result_f64  //
-wuffs_base__private_implementation__medium_prec_bin__parse_number_f64(
-    wuffs_base__private_implementation__medium_prec_bin* m,
-    const wuffs_base__private_implementation__high_prec_dec* h,
-    bool skip_fast_path_for_tests) {
-  do {
-    // m->mantissa is a uint64_t, which is an integer approximation to a
-    // rational value - h's underlying digits after m's normalization. This
-    // error is an upper bound on the difference between the approximate and
-    // actual value.
-    //
-    // The DiyFpStrtod function in https://github.com/google/double-conversion
-    // uses a finer grain (1/8th of the ULP, Unit in the Last Place) when
-    // tracking error. This implementation is coarser (1 ULP) but simpler.
-    //
-    // It is an error in the "numerical approximation" sense, not in the
-    // typical programming sense (as in "bad input" or "a result type").
-    uint64_t error = 0;
-
-    // Convert up to 19 decimal digits (in h->digits) to 64 binary digits (in
-    // m->mantissa): (1e19 < (1<<64)) and ((1<<64) < 1e20). If we have more
-    // than 19 digits, we're truncating (with error).
-    uint32_t i;
-    uint32_t i_end = h->num_digits;
-    if (i_end > 19) {
-      i_end = 19;
-      error = 1;
-    }
-    uint64_t mantissa = 0;
-    for (i = 0; i < i_end; i++) {
-      mantissa = (10 * mantissa) + h->digits[i];
-    }
-    m->mantissa = mantissa;
-    m->exp2 = 0;
-
-    // Check that exp10 lies in the (big_powers_of_10 + small_powers_of_10)
-    // range, -348 ..= +347, stepping big_powers_of_10 by 8 (which is 87
-    // triples) and small_powers_of_10 by 1 (which is 8 triples).
-    int32_t exp10 = h->decimal_point - ((int32_t)(i_end));
-    if (exp10 < -348) {
-      goto fail;
-    }
-    uint32_t bpo10 = ((uint32_t)(exp10 + 348)) / 8;
-    uint32_t spo10 = ((uint32_t)(exp10 + 348)) % 8;
-    if (bpo10 >= 87) {
-      goto fail;
-    }
-
-    // Try a fast path, if float64 math would be exact.
-    //
-    // 15 is such that 1e15 can be losslessly represented in a float64
-    // mantissa: (1e15 < (1<<53)) and ((1<<53) < 1e16).
-    //
-    // 22 is the maximum valid index for the
-    // wuffs_base__private_implementation__f64_powers_of_10 array.
-    do {
-      if (skip_fast_path_for_tests || ((mantissa >> 52) != 0)) {
-        break;
-      }
-      double d = (double)mantissa;
-
-      if (exp10 == 0) {
-        wuffs_base__result_f64 ret;
-        ret.status.repr = NULL;
-        ret.value = h->negative ? -d : +d;
-        return ret;
-
-      } else if (exp10 > 0) {
-        if (exp10 > 22) {
-          if (exp10 > (15 + 22)) {
-            break;
-          }
-          // If exp10 is in the range 23 ..= 37, try moving a few of the zeroes
-          // from the exponent to the mantissa. If we're still under 1e15, we
-          // haven't truncated any mantissa bits.
-          if (exp10 > 22) {
-            d *= wuffs_base__private_implementation__f64_powers_of_10[exp10 -
-                                                                      22];
-            exp10 = 22;
-            if (d >= 1e15) {
-              break;
-            }
-          }
-        }
-        d *= wuffs_base__private_implementation__f64_powers_of_10[exp10];
-        wuffs_base__result_f64 ret;
-        ret.status.repr = NULL;
-        ret.value = h->negative ? -d : +d;
-        return ret;
-
-      } else {  // "if (exp10 < 0)" is effectively "if (true)" here.
-        if (exp10 < -22) {
-          break;
-        }
-        d /= wuffs_base__private_implementation__f64_powers_of_10[-exp10];
-        wuffs_base__result_f64 ret;
-        ret.status.repr = NULL;
-        ret.value = h->negative ? -d : +d;
-        return ret;
-      }
-    } while (0);
-
-    // Normalize (and scale the error).
-    error <<= wuffs_base__private_implementation__medium_prec_bin__normalize(m);
-
-    // Multiplying two MPB values nominally multiplies two mantissas, call them
-    // A and B, which are integer approximations to the precise values (A+a)
-    // and (B+b) for some error terms a and b.
-    //
-    // MPB multiplication calculates (((A+a) * (B+b)) >> 64) to be ((A*B) >>
-    // 64). Shifting (truncating) and rounding introduces further error. The
-    // difference between the calculated result:
-    //  ((A*B                  ) >> 64)
-    // and the true result:
-    //  ((A*B + A*b + a*B + a*b) >> 64)   + rounding_error
-    // is:
-    //  ((      A*b + a*B + a*b) >> 64)   + rounding_error
-    // which can be re-grouped as:
-    //  ((A*b) >> 64) + ((a*(B+b)) >> 64) + rounding_error
-    //
-    // Now, let A and a be "m->mantissa" and "error", and B and b be the
-    // pre-calculated power of 10. A and B are both less than (1 << 64), a is
-    // the "error" local variable and b is less than 1.
-    //
-    // An upper bound (in absolute value) on ((A*b) >> 64) is therefore 1.
-    //
-    // An upper bound on ((a*(B+b)) >> 64) is a, also known as error.
-    //
-    // Finally, the rounding_error is at most 1.
-    //
-    // In total, calling mpb__mul_pow_10 will raise the worst-case error by 2.
-    // The subsequent re-normalization can multiply that by a further factor.
-
-    // Multiply by small_powers_of_10[etc].
-    wuffs_base__private_implementation__medium_prec_bin__mul_pow_10(
-        m, &wuffs_base__private_implementation__small_powers_of_10[3 * spo10]);
-    error += 2;
-    error <<= wuffs_base__private_implementation__medium_prec_bin__normalize(m);
-
-    // Multiply by big_powers_of_10[etc].
-    wuffs_base__private_implementation__medium_prec_bin__mul_pow_10(
-        m, &wuffs_base__private_implementation__big_powers_of_10[3 * bpo10]);
-    error += 2;
-    error <<= wuffs_base__private_implementation__medium_prec_bin__normalize(m);
-
-    // We have a good approximation of h, but we still have to check whether
-    // the error is small enough. Equivalently, whether the number of surplus
-    // mantissa bits (the bits dropped when going from m's 64 mantissa bits to
-    // the smaller number of double-precision mantissa bits) would always round
-    // up or down, even when perturbed by ±error. We start at 11 surplus bits
-    // (m has 64, double-precision has 1+52), but it can be higher for
-    // subnormals.
-    //
-    // In many cases, the error is small enough and we return true.
-    const int32_t f64_bias = -1023;
-    int32_t subnormal_exp2 = f64_bias - 63;
-    uint32_t surplus_bits = 11;
-    if (subnormal_exp2 >= m->exp2) {
-      surplus_bits += 1 + ((uint32_t)(subnormal_exp2 - m->exp2));
-    }
-
-    uint64_t surplus_mask =
-        (((uint64_t)1) << surplus_bits) - 1;  // e.g. 0x07FF.
-    uint64_t surplus = m->mantissa & surplus_mask;
-    uint64_t halfway = ((uint64_t)1) << (surplus_bits - 1);  // e.g. 0x0400.
-
-    // Do the final calculation in *signed* arithmetic.
-    int64_t i_surplus = (int64_t)surplus;
-    int64_t i_halfway = (int64_t)halfway;
-    int64_t i_error = (int64_t)error;
-
-    if ((i_surplus > (i_halfway - i_error)) &&
-        (i_surplus < (i_halfway + i_error))) {
-      goto fail;
-    }
-
-    wuffs_base__result_f64 ret;
-    ret.status.repr = NULL;
-    ret.value = wuffs_base__private_implementation__medium_prec_bin__as_f64(
-        m, h->negative);
-    return ret;
-  } while (0);
-
-fail:
-  do {
-    wuffs_base__result_f64 ret;
-    ret.status.repr = "#base: mpb__parse_number_f64 failed";
-    ret.value = 0;
-    return ret;
-  } while (0);
-}
-
-// --------
-
-wuffs_base__result_f64  //
-wuffs_base__parse_number_f64_special(wuffs_base__slice_u8 s,
-                                     const char* fallback_status_repr) {
-  do {
-    uint8_t* p = s.ptr;
-    uint8_t* q = s.ptr + s.len;
-
-    for (; (p < q) && (*p == '_'); p++) {
-    }
-    if (p >= q) {
-      goto fallback;
-    }
-
-    // Parse sign.
-    bool negative = false;
-    do {
-      if (*p == '+') {
-        p++;
-      } else if (*p == '-') {
-        negative = true;
-        p++;
-      } else {
-        break;
-      }
-      for (; (p < q) && (*p == '_'); p++) {
-      }
-    } while (0);
-    if (p >= q) {
-      goto fallback;
-    }
-
-    bool nan = false;
-    switch (p[0]) {
-      case 'I':
-      case 'i':
-        if (((q - p) < 3) ||                     //
-            ((p[1] != 'N') && (p[1] != 'n')) ||  //
-            ((p[2] != 'F') && (p[2] != 'f'))) {
-          goto fallback;
-        }
-        p += 3;
-
-        if ((p >= q) || (*p == '_')) {
-          break;
-        } else if (((q - p) < 5) ||                     //
-                   ((p[0] != 'I') && (p[0] != 'i')) ||  //
-                   ((p[1] != 'N') && (p[1] != 'n')) ||  //
-                   ((p[2] != 'I') && (p[2] != 'i')) ||  //
-                   ((p[3] != 'T') && (p[3] != 't')) ||  //
-                   ((p[4] != 'Y') && (p[4] != 'y'))) {
-          goto fallback;
-        }
-        p += 5;
-
-        if ((p >= q) || (*p == '_')) {
-          break;
-        }
-        goto fallback;
-
-      case 'N':
-      case 'n':
-        if (((q - p) < 3) ||                     //
-            ((p[1] != 'A') && (p[1] != 'a')) ||  //
-            ((p[2] != 'N') && (p[2] != 'n'))) {
-          goto fallback;
-        }
-        p += 3;
-
-        if ((p >= q) || (*p == '_')) {
-          nan = true;
-          break;
-        }
-        goto fallback;
-
-      default:
-        goto fallback;
-    }
-
-    // Finish.
-    for (; (p < q) && (*p == '_'); p++) {
-    }
-    if (p != q) {
-      goto fallback;
-    }
-    wuffs_base__result_f64 ret;
-    ret.status.repr = NULL;
-    ret.value = wuffs_base__ieee_754_bit_representation__to_f64(
-        (nan ? 0x7FFFFFFFFFFFFFFF : 0x7FF0000000000000) |
-        (negative ? 0x8000000000000000 : 0));
-    return ret;
-  } while (0);
-
-fallback:
-  do {
-    wuffs_base__result_f64 ret;
-    ret.status.repr = fallback_status_repr;
-    ret.value = 0;
-    return ret;
-  } while (0);
-}
-
-wuffs_base__result_f64  //
-wuffs_base__parse_number_f64(wuffs_base__slice_u8 s) {
-  wuffs_base__private_implementation__medium_prec_bin m;
-  wuffs_base__private_implementation__high_prec_dec h;
-
-  do {
-    // powers converts decimal powers of 10 to binary powers of 2. For example,
-    // (10000 >> 13) is 1. It stops before the elements exceed 60, also known
-    // as WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL.
-    static const uint32_t num_powers = 19;
-    static const uint8_t powers[19] = {
-        0,  3,  6,  9,  13, 16, 19, 23, 26, 29,  //
-        33, 36, 39, 43, 46, 49, 53, 56, 59,      //
-    };
-
-    wuffs_base__status status =
-        wuffs_base__private_implementation__high_prec_dec__parse(&h, s);
-    if (status.repr) {
-      return wuffs_base__parse_number_f64_special(s, status.repr);
-    }
-
-    // Handle zero and obvious extremes. The largest and smallest positive
-    // finite f64 values are approximately 1.8e+308 and 4.9e-324.
-    if ((h.num_digits == 0) || (h.decimal_point < -326)) {
-      goto zero;
-    } else if (h.decimal_point > 310) {
-      goto infinity;
-    }
-
-    wuffs_base__result_f64 mpb_result =
-        wuffs_base__private_implementation__medium_prec_bin__parse_number_f64(
-            &m, &h, false);
-    if (mpb_result.status.repr == NULL) {
-      return mpb_result;
-    }
-
-    // Scale by powers of 2 until we're in the range [½ .. 1], which gives us
-    // our exponent (in base-2). First we shift right, possibly a little too
-    // far, ending with a value certainly below 1 and possibly below ½...
-    const int32_t f64_bias = -1023;
-    int32_t exp2 = 0;
-    while (h.decimal_point > 0) {
-      uint32_t n = (uint32_t)(+h.decimal_point);
-      uint32_t shift =
-          (n < num_powers)
-              ? powers[n]
-              : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
-
-      wuffs_base__private_implementation__high_prec_dec__small_rshift(&h,
-                                                                      shift);
-      if (h.decimal_point <
-          -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
-        goto zero;
-      }
-      exp2 += (int32_t)shift;
-    }
-    // ...then we shift left, putting us in [½ .. 1].
-    while (h.decimal_point <= 0) {
-      uint32_t shift;
-      if (h.decimal_point == 0) {
-        if (h.digits[0] >= 5) {
-          break;
-        }
-        shift = (h.digits[0] <= 2) ? 2 : 1;
-      } else {
-        uint32_t n = (uint32_t)(-h.decimal_point);
-        shift = (n < num_powers)
-                    ? powers[n]
-                    : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
-      }
-
-      wuffs_base__private_implementation__high_prec_dec__small_lshift(&h,
-                                                                      shift);
-      if (h.decimal_point >
-          +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
-        goto infinity;
-      }
-      exp2 -= (int32_t)shift;
-    }
-
-    // We're in the range [½ .. 1] but f64 uses [1 .. 2].
-    exp2--;
-
-    // The minimum normal exponent is (f64_bias + 1).
-    while ((f64_bias + 1) > exp2) {
-      uint32_t n = (uint32_t)((f64_bias + 1) - exp2);
-      if (n > WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL) {
-        n = WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
-      }
-      wuffs_base__private_implementation__high_prec_dec__small_rshift(&h, n);
-      exp2 += (int32_t)n;
-    }
-
-    // Check for overflow.
-    if ((exp2 - f64_bias) >= 0x07FF) {  // (1 << 11) - 1.
-      goto infinity;
-    }
-
-    // Extract 53 bits for the mantissa (in base-2).
-    wuffs_base__private_implementation__high_prec_dec__small_lshift(&h, 53);
-    uint64_t man2 =
-        wuffs_base__private_implementation__high_prec_dec__rounded_integer(&h);
-
-    // Rounding might have added one bit. If so, shift and re-check overflow.
-    if ((man2 >> 53) != 0) {
-      man2 >>= 1;
-      exp2++;
-      if ((exp2 - f64_bias) >= 0x07FF) {  // (1 << 11) - 1.
-        goto infinity;
-      }
-    }
-
-    // Handle subnormal numbers.
-    if ((man2 >> 52) == 0) {
-      exp2 = f64_bias;
-    }
-
-    // Pack the bits and return.
-    uint64_t exp2_bits =
-        (uint64_t)((exp2 - f64_bias) & 0x07FF);             // (1 << 11) - 1.
-    uint64_t bits = (man2 & 0x000FFFFFFFFFFFFF) |           // (1 << 52) - 1.
-                    (exp2_bits << 52) |                     //
-                    (h.negative ? 0x8000000000000000 : 0);  // (1 << 63).
-
-    wuffs_base__result_f64 ret;
-    ret.status.repr = NULL;
-    ret.value = wuffs_base__ieee_754_bit_representation__to_f64(bits);
-    return ret;
-  } while (0);
-
-zero:
-  do {
-    uint64_t bits = h.negative ? 0x8000000000000000 : 0;
-
-    wuffs_base__result_f64 ret;
-    ret.status.repr = NULL;
-    ret.value = wuffs_base__ieee_754_bit_representation__to_f64(bits);
-    return ret;
-  } while (0);
-
-infinity:
-  do {
-    uint64_t bits = h.negative ? 0xFFF0000000000000 : 0x7FF0000000000000;
-
-    wuffs_base__result_f64 ret;
-    ret.status.repr = NULL;
-    ret.value = wuffs_base__ieee_754_bit_representation__to_f64(bits);
-    return ret;
-  } while (0);
-}
-
 // ---------------- Hexadecimal
 
 size_t  //
diff --git a/internal/cgen/cgen.go b/internal/cgen/cgen.go
index 0a04649..7f6491d 100644
--- a/internal/cgen/cgen.go
+++ b/internal/cgen/cgen.go
@@ -116,13 +116,15 @@
 			}
 			buf := make(buffer, 0, 128*1024)
 			if err := expandBangBangInsert(&buf, baseAllImplC, map[string]func(*buffer) error{
-				"// !! INSERT InterfaceDeclarations.\n": insertInterfaceDeclarations,
-				"// !! INSERT InterfaceDefinitions.\n":  insertInterfaceDefinitions,
-				"// !! INSERT base/all-private.h.\n":    insertBaseAllPrivateH,
-				"// !! INSERT base/all-public.h.\n":     insertBaseAllPublicH,
-				"// !! INSERT base/copyright\n":         insertBaseCopyright,
-				"// !! INSERT base/image-impl.c.\n":     insertBaseImageImplC,
-				"// !! INSERT base/strconv-impl.c.\n":   insertBaseStrConvImplC,
+				"// !! INSERT InterfaceDeclarations.\n":    insertInterfaceDeclarations,
+				"// !! INSERT InterfaceDefinitions.\n":     insertInterfaceDefinitions,
+				"// !! INSERT base/all-private.h.\n":       insertBaseAllPrivateH,
+				"// !! INSERT base/all-public.h.\n":        insertBaseAllPublicH,
+				"// !! INSERT base/copyright\n":            insertBaseCopyright,
+				"// !! INSERT base/f64conv-submodule.c.\n": insertBaseF64ConvSubmoduleC,
+				"// !! INSERT base/image-impl.c.\n":        insertBaseImageImplC,
+				"// !! INSERT base/pixconv-submodule.c.\n": insertBasePixConvSubmoduleC,
+				"// !! INSERT base/strconv-impl.c.\n":      insertBaseStrConvImplC,
 				"// !! INSERT wuffs_base__status strings.\n": func(b *buffer) error {
 					for _, z := range builtin.Statuses {
 						msg, _ := t.Unescape(z)
@@ -327,12 +329,24 @@
 	return nil
 }
 
+func insertBaseF64ConvSubmoduleC(buf *buffer) error {
+	buf.writes(baseF64ConvSubmoduleC)
+	buf.writeb('\n')
+	return nil
+}
+
 func insertBaseImageImplC(buf *buffer) error {
 	buf.writes(baseImageImplC)
 	buf.writeb('\n')
 	return nil
 }
 
+func insertBasePixConvSubmoduleC(buf *buffer) error {
+	buf.writes(basePixConvSubmoduleC)
+	buf.writeb('\n')
+	return nil
+}
+
 func insertBaseStrConvImplC(buf *buffer) error {
 	buf.writes(baseStrConvImplC)
 	buf.writeb('\n')
diff --git a/internal/cgen/data.go b/internal/cgen/data.go
index 4f5bce9..9d08a41 100644
--- a/internal/cgen/data.go
+++ b/internal/cgen/data.go
@@ -23,20 +23,14 @@
 	"" +
 	"// ----------------\n\n#if !defined(WUFFS_CONFIG__MODULES) || defined(WUFFS_CONFIG__MODULE__BASE)\n\nconst uint8_t wuffs_base__low_bits_mask__u8[9] = {\n    0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F, 0xFF,\n};\n\nconst uint16_t wuffs_base__low_bits_mask__u16[17] = {\n    0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF,\n    0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,\n};\n\nconst uint32_t wuffs_base__low_bits_mask__u32[33] = {\n    0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000F, 0x0000001F,\n    0x0000003F, 0x0000007F, 0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF,\n    0x00000FFF, 0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF, 0x0001FFFF,\n    0x0003FFFF, 0x0007FFFF, 0x000FFFFF, 0x001FFFFF, 0x003FFFFF, 0x007FFFFF,\n    0x00FFFFFF, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF, 0x1FFFFFFF,\n    0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF,\n};\n\nconst uint64_t wuffs_base__low_bits_mask__u64[65] = {\n    0x0000000000000000, 0x0000000000000001, 0x0000000000000003,\n    0x0000000000" +
 	"000007, 0x000000000000000F, 0x000000000000001F,\n    0x000000000000003F, 0x000000000000007F, 0x00000000000000FF,\n    0x00000000000001FF, 0x00000000000003FF, 0x00000000000007FF,\n    0x0000000000000FFF, 0x0000000000001FFF, 0x0000000000003FFF,\n    0x0000000000007FFF, 0x000000000000FFFF, 0x000000000001FFFF,\n    0x000000000003FFFF, 0x000000000007FFFF, 0x00000000000FFFFF,\n    0x00000000001FFFFF, 0x00000000003FFFFF, 0x00000000007FFFFF,\n    0x0000000000FFFFFF, 0x0000000001FFFFFF, 0x0000000003FFFFFF,\n    0x0000000007FFFFFF, 0x000000000FFFFFFF, 0x000000001FFFFFFF,\n    0x000000003FFFFFFF, 0x000000007FFFFFFF, 0x00000000FFFFFFFF,\n    0x00000001FFFFFFFF, 0x00000003FFFFFFFF, 0x00000007FFFFFFFF,\n    0x0000000FFFFFFFFF, 0x0000001FFFFFFFFF, 0x0000003FFFFFFFFF,\n    0x0000007FFFFFFFFF, 0x000000FFFFFFFFFF, 0x000001FFFFFFFFFF,\n    0x000003FFFFFFFFFF, 0x000007FFFFFFFFFF, 0x00000FFFFFFFFFFF,\n    0x00001FFFFFFFFFFF, 0x00003FFFFFFFFFFF, 0x00007FFFFFFFFFFF,\n    0x0000FFFFFFFFFFFF, 0x0001FFFFFFFFFFFF, 0x0003FFFFFFFFFFFF,\n    0x0007FFFFFF" +
-	"FFFFFF, 0x000FFFFFFFFFFFFF, 0x001FFFFFFFFFFFFF,\n    0x003FFFFFFFFFFFFF, 0x007FFFFFFFFFFFFF, 0x00FFFFFFFFFFFFFF,\n    0x01FFFFFFFFFFFFFF, 0x03FFFFFFFFFFFFFF, 0x07FFFFFFFFFFFFFF,\n    0x0FFFFFFFFFFFFFFF, 0x1FFFFFFFFFFFFFFF, 0x3FFFFFFFFFFFFFFF,\n    0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF,\n};\n\n// !! INSERT wuffs_base__status strings.\n\n// !! INSERT InterfaceDefinitions.\n\n// !! INSERT base/image-impl.c.\n\n// !! INSERT base/strconv-impl.c.\n\n#endif  // !defined(WUFFS_CONFIG__MODULES) ||\n        // defined(WUFFS_CONFIG__MODULE__BASE)\n\n#ifdef __cplusplus\n}  // extern \"C\"\n#endif\n\n#endif  // WUFFS_IMPLEMENTATION\n\n// !! WUFFS MONOLITHIC RELEASE DISCARDS EVERYTHING BELOW.\n\n#endif  // WUFFS_INCLUDE_GUARD__BASE\n" +
+	"FFFFFF, 0x000FFFFFFFFFFFFF, 0x001FFFFFFFFFFFFF,\n    0x003FFFFFFFFFFFFF, 0x007FFFFFFFFFFFFF, 0x00FFFFFFFFFFFFFF,\n    0x01FFFFFFFFFFFFFF, 0x03FFFFFFFFFFFFFF, 0x07FFFFFFFFFFFFFF,\n    0x0FFFFFFFFFFFFFFF, 0x1FFFFFFFFFFFFFFF, 0x3FFFFFFFFFFFFFFF,\n    0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF,\n};\n\n// !! INSERT wuffs_base__status strings.\n\n// !! INSERT InterfaceDefinitions.\n\n// !! INSERT base/image-impl.c.\n\n// !! INSERT base/strconv-impl.c.\n\n#endif  // !defined(WUFFS_CONFIG__MODULES) ||\n        // defined(WUFFS_CONFIG__MODULE__BASE)\n\n#if !defined(WUFFS_CONFIG__MODULES) || defined(WUFFS_CONFIG__MODULE__BASE)\n\n// !! INSERT base/f64conv-submodule.c.\n\n#endif  // !defined(WUFFS_CONFIG__MODULES) ||\n        // defined(WUFFS_CONFIG__MODULE__BASE)\n\n#if !defined(WUFFS_CONFIG__MODULES) || defined(WUFFS_CONFIG__MODULE__BASE)\n\n// !! INSERT base/pixconv-submodule.c.\n\n#endif  // !defined(WUFFS_CONFIG__MODULES) ||\n        // defined(WUFFS_CONFIG__MODULE__BASE)\n\n#ifdef __cplusplus\n}  // extern \"C\"\n#endif\n\n#endif  // WUFFS_IMPLEMENTATION\n" +
+	"\n// !! WUFFS MONOLITHIC RELEASE DISCARDS EVERYTHING BELOW.\n\n#endif  // WUFFS_INCLUDE_GUARD__BASE\n" +
 	""
 
 const baseImageImplC = "" +
 	"// ---------------- Images\n\nconst uint32_t wuffs_base__pixel_format__bits_per_channel[16] = {\n    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,\n    0x08, 0x0A, 0x0C, 0x10, 0x18, 0x20, 0x30, 0x40,\n};\n\n" +
 	"" +
-	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__squash_bgr_565_888(wuffs_base__slice_u8 dst,\n                                               wuffs_base__slice_u8 src) {\n  size_t len4 = (dst.len < src.len ? dst.len : src.len) / 4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n\n  size_t n = len4;\n  while (n--) {\n    uint32_t argb = wuffs_base__load_u32le__no_bounds_check(s);\n    uint32_t b5 = 0x1F & (argb >> (8 - 5));\n    uint32_t g6 = 0x3F & (argb >> (16 - 6));\n    uint32_t r5 = 0x1F & (argb >> (24 - 5));\n    uint32_t alpha = argb & 0xFF000000;\n    wuffs_base__store_u32le__no_bounds_check(\n        d, alpha | (r5 << 11) | (g6 << 5) | (b5 << 0));\n    s += 4;\n    d += 4;\n  }\n  return len4 * 4;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__swap_rgbx_bgrx(wuffs_base__slice_u8 dst,\n                                           wuffs_base__slice_u8 src) {\n  size_t len4 = (dst.len < src.len ? dst.len : src.len) / 4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n\n  size_t n = len4;\n  while (n--) {\n   " +
-	" uint8_t b0 = s[0];\n    uint8_t b1 = s[1];\n    uint8_t b2 = s[2];\n    uint8_t b3 = s[3];\n    d[0] = b2;\n    d[1] = b1;\n    d[2] = b0;\n    d[3] = b3;\n    s += 4;\n    d += 4;\n  }\n  return len4 * 4;\n}\n\nstatic inline uint32_t  //\nwuffs_base__swap_u32_argb_abgr(uint32_t u) {\n  uint32_t o = u & 0xFF00FF00;\n  uint32_t r = u & 0x00FF0000;\n  uint32_t b = u & 0x000000FF;\n  return o | (r >> 16) | (b << 16);\n}\n\n" +
-	"" +
-	"// --------\n\nstatic inline uint32_t  //\nwuffs_base__composite_nonpremul_nonpremul_u32_axxx(uint32_t dst_nonpremul,\n                                                   uint32_t src_nonpremul) {\n  // Convert from 8-bit color to 16-bit color.\n  uint32_t sa = 0x101 * (0xFF & (src_nonpremul >> 24));\n  uint32_t sr = 0x101 * (0xFF & (src_nonpremul >> 16));\n  uint32_t sg = 0x101 * (0xFF & (src_nonpremul >> 8));\n  uint32_t sb = 0x101 * (0xFF & (src_nonpremul >> 0));\n  uint32_t da = 0x101 * (0xFF & (dst_nonpremul >> 24));\n  uint32_t dr = 0x101 * (0xFF & (dst_nonpremul >> 16));\n  uint32_t dg = 0x101 * (0xFF & (dst_nonpremul >> 8));\n  uint32_t db = 0x101 * (0xFF & (dst_nonpremul >> 0));\n\n  // Convert dst from nonpremul to premul.\n  dr = (dr * da) / 0xFFFF;\n  dg = (dg * da) / 0xFFFF;\n  db = (db * da) / 0xFFFF;\n\n  // Calculate the inverse of the src-alpha: how much of the dst to keep.\n  uint32_t ia = 0xFFFF - sa;\n\n  // Composite src (nonpremul) over dst (premul).\n  da = sa + ((da * ia) / 0xFFFF);\n  dr = ((sr * sa) + (dr * i" +
-	"a)) / 0xFFFF;\n  dg = ((sg * sa) + (dg * ia)) / 0xFFFF;\n  db = ((sb * sa) + (db * ia)) / 0xFFFF;\n\n  // Convert dst from premul to nonpremul.\n  if (da != 0) {\n    dr = (dr * 0xFFFF) / da;\n    dg = (dg * 0xFFFF) / da;\n    db = (db * 0xFFFF) / da;\n  }\n\n  // Convert from 16-bit color to 8-bit color and combine the components.\n  da >>= 8;\n  dr >>= 8;\n  dg >>= 8;\n  db >>= 8;\n  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);\n}\n\nstatic inline uint32_t  //\nwuffs_base__composite_nonpremul_premul_u32_axxx(uint32_t dst_nonpremul,\n                                                uint32_t src_premul) {\n  // Convert from 8-bit color to 16-bit color.\n  uint32_t sa = 0x101 * (0xFF & (src_premul >> 24));\n  uint32_t sr = 0x101 * (0xFF & (src_premul >> 16));\n  uint32_t sg = 0x101 * (0xFF & (src_premul >> 8));\n  uint32_t sb = 0x101 * (0xFF & (src_premul >> 0));\n  uint32_t da = 0x101 * (0xFF & (dst_nonpremul >> 24));\n  uint32_t dr = 0x101 * (0xFF & (dst_nonpremul >> 16));\n  uint32_t dg = 0x101 * (0xFF & (dst_nonpremul >> 8))" +
-	";\n  uint32_t db = 0x101 * (0xFF & (dst_nonpremul >> 0));\n\n  // Convert dst from nonpremul to premul.\n  dr = (dr * da) / 0xFFFF;\n  dg = (dg * da) / 0xFFFF;\n  db = (db * da) / 0xFFFF;\n\n  // Calculate the inverse of the src-alpha: how much of the dst to keep.\n  uint32_t ia = 0xFFFF - sa;\n\n  // Composite src (premul) over dst (premul).\n  da = sa + ((da * ia) / 0xFFFF);\n  dr = sr + ((dr * ia) / 0xFFFF);\n  dg = sg + ((dg * ia) / 0xFFFF);\n  db = sb + ((db * ia) / 0xFFFF);\n\n  // Convert dst from premul to nonpremul.\n  if (da != 0) {\n    dr = (dr * 0xFFFF) / da;\n    dg = (dg * 0xFFFF) / da;\n    db = (db * 0xFFFF) / da;\n  }\n\n  // Convert from 16-bit color to 8-bit color and combine the components.\n  da >>= 8;\n  dr >>= 8;\n  dg >>= 8;\n  db >>= 8;\n  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);\n}\n\nstatic inline uint32_t  //\nwuffs_base__composite_premul_nonpremul_u32_axxx(uint32_t dst_premul,\n                                                uint32_t src_nonpremul) {\n  // Convert from 8-bit color to 16-bit color.\n " +
-	" uint32_t sa = 0x101 * (0xFF & (src_nonpremul >> 24));\n  uint32_t sr = 0x101 * (0xFF & (src_nonpremul >> 16));\n  uint32_t sg = 0x101 * (0xFF & (src_nonpremul >> 8));\n  uint32_t sb = 0x101 * (0xFF & (src_nonpremul >> 0));\n  uint32_t da = 0x101 * (0xFF & (dst_premul >> 24));\n  uint32_t dr = 0x101 * (0xFF & (dst_premul >> 16));\n  uint32_t dg = 0x101 * (0xFF & (dst_premul >> 8));\n  uint32_t db = 0x101 * (0xFF & (dst_premul >> 0));\n\n  // Calculate the inverse of the src-alpha: how much of the dst to keep.\n  uint32_t ia = 0xFFFF - sa;\n\n  // Composite src (nonpremul) over dst (premul).\n  da = sa + ((da * ia) / 0xFFFF);\n  dr = ((sr * sa) + (dr * ia)) / 0xFFFF;\n  dg = ((sg * sa) + (dg * ia)) / 0xFFFF;\n  db = ((sb * sa) + (db * ia)) / 0xFFFF;\n\n  // Convert from 16-bit color to 8-bit color and combine the components.\n  da >>= 8;\n  dr >>= 8;\n  dg >>= 8;\n  db >>= 8;\n  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);\n}\n\nstatic inline uint32_t  //\nwuffs_base__composite_premul_premul_u32_axxx(uint32_t dst_premul,\n    " +
-	"                                         uint32_t src_premul) {\n  // Convert from 8-bit color to 16-bit color.\n  uint32_t sa = 0x101 * (0xFF & (src_premul >> 24));\n  uint32_t sr = 0x101 * (0xFF & (src_premul >> 16));\n  uint32_t sg = 0x101 * (0xFF & (src_premul >> 8));\n  uint32_t sb = 0x101 * (0xFF & (src_premul >> 0));\n  uint32_t da = 0x101 * (0xFF & (dst_premul >> 24));\n  uint32_t dr = 0x101 * (0xFF & (dst_premul >> 16));\n  uint32_t dg = 0x101 * (0xFF & (dst_premul >> 8));\n  uint32_t db = 0x101 * (0xFF & (dst_premul >> 0));\n\n  // Calculate the inverse of the src-alpha: how much of the dst to keep.\n  uint32_t ia = 0xFFFF - sa;\n\n  // Composite src (premul) over dst (premul).\n  da = sa + ((da * ia) / 0xFFFF);\n  dr = sr + ((dr * ia) / 0xFFFF);\n  dg = sg + ((dg * ia) / 0xFFFF);\n  db = sb + ((db * ia) / 0xFFFF);\n\n  // Convert from 16-bit color to 8-bit color and combine the components.\n  da >>= 8;\n  dr >>= 8;\n  dg >>= 8;\n  db >>= 8;\n  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);\n}\n\n" +
+	"// --------\n\nstatic inline uint32_t  //\nwuffs_base__swap_u32_argb_abgr(uint32_t u) {\n  uint32_t o = u & 0xFF00FF00;\n  uint32_t r = u & 0x00FF0000;\n  uint32_t b = u & 0x000000FF;\n  return o | (r >> 16) | (b << 16);\n}\n\n" +
 	"" +
 	"// --------\n\nwuffs_base__color_u32_argb_premul  //\nwuffs_base__pixel_buffer__color_u32_at(const wuffs_base__pixel_buffer* pb,\n                                       uint32_t x,\n                                       uint32_t y) {\n  if (!pb || (x >= pb->pixcfg.private_impl.width) ||\n      (y >= pb->pixcfg.private_impl.height)) {\n    return 0;\n  }\n\n  if (wuffs_base__pixel_format__is_planar(&pb->pixcfg.private_impl.pixfmt)) {\n    // TODO: support planar formats.\n    return 0;\n  }\n\n  size_t stride = pb->private_impl.planes[0].stride;\n  uint8_t* row = pb->private_impl.planes[0].ptr + (stride * ((size_t)y));\n\n  switch (pb->pixcfg.private_impl.pixfmt.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n      return wuffs_base__load_u32le__no_bounds_check(row + (4 * ((size_t)x)));\n\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY: {\n      uint8_t* palette = pb->private_impl.planes[3].ptr;\n      return wu" +
 	"ffs_base__load_u32le__no_bounds_check(palette +\n                                                     (4 * ((size_t)row[x])));\n    }\n\n      // Common formats above. Rarer formats below.\n\n    case WUFFS_BASE__PIXEL_FORMAT__Y:\n      return 0xFF000000 | (0x00010101 * ((uint32_t)(row[x])));\n\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL: {\n      uint8_t* palette = pb->private_impl.planes[3].ptr;\n      return wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(\n          wuffs_base__load_u32le__no_bounds_check(palette +\n                                                  (4 * ((size_t)row[x]))));\n    }\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__color_u16_rgb_565__as__color_u32_argb_premul(\n          wuffs_base__load_u16le__no_bounds_check(row + (2 * ((size_t)x))));\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      return 0xFF000000 |\n             wuffs_base__load_u24le__no_bounds_check(row + (3 * ((size_t)x)));\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      re" +
@@ -47,50 +41,7 @@
 	"gr(color));\n      break;\n\n    default:\n      // TODO: support more formats.\n      return wuffs_base__make_status(wuffs_base__error__unsupported_option);\n  }\n\n  return wuffs_base__make_status(NULL);\n}\n\n" +
 	"" +
 	"// --------\n\nuint8_t  //\nwuffs_base__pixel_palette__closest_element(\n    wuffs_base__slice_u8 palette_slice,\n    wuffs_base__pixel_format palette_format,\n    wuffs_base__color_u32_argb_premul c) {\n  size_t n = palette_slice.len / 4;\n  if (n > 256) {\n    n = 256;\n  }\n  size_t best_index = 0;\n  uint64_t best_score = 0xFFFFFFFFFFFFFFFF;\n\n  // Work in 16-bit color.\n  uint32_t ca = 0x101 * (0xFF & (c >> 24));\n  uint32_t cr = 0x101 * (0xFF & (c >> 16));\n  uint32_t cg = 0x101 * (0xFF & (c >> 8));\n  uint32_t cb = 0x101 * (0xFF & (c >> 0));\n\n  switch (palette_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY: {\n      bool nonpremul = palette_format.repr ==\n                       WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL;\n\n      size_t i;\n      for (i = 0; i < n; i++) {\n        // Work in 16-bit color.\n        uint32_t pb = 0x101 * ((uint32_t)(palette_slice.ptr[(4 * i) + 0]))" +
-	";\n        uint32_t pg = 0x101 * ((uint32_t)(palette_slice.ptr[(4 * i) + 1]));\n        uint32_t pr = 0x101 * ((uint32_t)(palette_slice.ptr[(4 * i) + 2]));\n        uint32_t pa = 0x101 * ((uint32_t)(palette_slice.ptr[(4 * i) + 3]));\n\n        // Convert to premultiplied alpha.\n        if (nonpremul && (pa != 0xFFFF)) {\n          pb = (pb * pa) / 0xFFFF;\n          pg = (pg * pa) / 0xFFFF;\n          pr = (pr * pa) / 0xFFFF;\n        }\n\n        // These deltas are conceptually int32_t (signed) but after squaring,\n        // it's equivalent to work in uint32_t (unsigned).\n        pb -= cb;\n        pg -= cg;\n        pr -= cr;\n        pa -= ca;\n        uint64_t score = ((uint64_t)(pb * pb)) + ((uint64_t)(pg * pg)) +\n                         ((uint64_t)(pr * pr)) + ((uint64_t)(pa * pa));\n        if (best_score > score) {\n          best_score = score;\n          best_index = i;\n        }\n      }\n      break;\n    }\n  }\n\n  return (uint8_t)best_index;\n}\n\n" +
-	"" +
-	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_1_1(wuffs_base__slice_u8 dst,\n                                     wuffs_base__slice_u8 dst_palette,\n                                     wuffs_base__slice_u8 src) {\n  return wuffs_base__slice_u8__copy_from_slice(dst, src);\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_3_3(wuffs_base__slice_u8 dst,\n                                     wuffs_base__slice_u8 dst_palette,\n                                     wuffs_base__slice_u8 src) {\n  size_t dst_len3 = dst.len / 3;\n  size_t src_len3 = src.len / 3;\n  size_t len = dst_len3 < src_len3 ? dst_len3 : src_len3;\n  if (len > 0) {\n    memmove(dst.ptr, src.ptr, len * 3);\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_4_4(wuffs_base__slice_u8 dst,\n                                     wuffs_base__slice_u8 dst_palette,\n                                     wuffs_base__slice_u8 src) {\n  size_t dst_len4 = dst.len / 4;\n  size_t src_len4 = src.len / 4;\n  size_t len = dst_len4 <" +
-	" src_len4 ? dst_len4 : src_len4;\n  if (len > 0) {\n    memmove(dst.ptr, src.ptr, len * 4);\n  }\n  return len;\n}\n\n" +
-	"" +
-	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__bgr(wuffs_base__slice_u8 dst,\n                                         wuffs_base__slice_u8 dst_palette,\n                                         wuffs_base__slice_u8 src) {\n  size_t dst_len2 = dst.len / 2;\n  size_t src_len3 = src.len / 3;\n  size_t len = dst_len2 < src_len3 ? dst_len2 : src_len3;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t b5 = s[0] >> 3;\n    uint32_t g6 = s[1] >> 2;\n    uint32_t r5 = s[2] >> 3;\n    uint32_t rgb_565 = (r5 << 11) | (g6 << 5) | (b5 << 0);\n    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);\n\n    s += 1 * 3;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  size_t dst_len2 = dst.len / 2;\n  size_t src_len4 = src.len / 4;\n  size_t len = " +
-	"dst_len2 < src_len4 ? dst_len2 : src_len4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    wuffs_base__store_u16le__no_bounds_check(\n        d + (0 * 2),\n        wuffs_base__color_u32_argb_premul__as__color_u16_rgb_565(\n            wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(\n                wuffs_base__load_u32le__no_bounds_check(s + (0 * 4)))));\n\n    s += 1 * 4;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  size_t dst_len2 = dst.len / 2;\n  size_t src_len4 = src.len / 4;\n  size_t len = dst_len2 < src_len4 ? dst_len2 : src_len4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    // Convert from 8-bit color to 16-bit color.\n    uint32_t sa = 0x101 * ((uint32_t)s[3]);\n    uint32_t sr = 0" +
-	"x101 * ((uint32_t)s[2]);\n    uint32_t sg = 0x101 * ((uint32_t)s[1]);\n    uint32_t sb = 0x101 * ((uint32_t)s[0]);\n\n    // Convert from 565 color to 16-bit color.\n    uint32_t old_rgb_565 = wuffs_base__load_u16le__no_bounds_check(d + (0 * 2));\n    uint32_t old_r5 = 0x1F & (old_rgb_565 >> 11);\n    uint32_t dr = (0x8421 * old_r5) >> 4;\n    uint32_t old_g6 = 0x3F & (old_rgb_565 >> 5);\n    uint32_t dg = (0x1041 * old_g6) >> 2;\n    uint32_t old_b5 = 0x1F & (old_rgb_565 >> 0);\n    uint32_t db = (0x8421 * old_b5) >> 4;\n\n    // Calculate the inverse of the src-alpha: how much of the dst to keep.\n    uint32_t ia = 0xFFFF - sa;\n\n    // Composite src (nonpremul) over dst (premul).\n    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;\n    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;\n    db = ((sb * sa) + (db * ia)) / 0xFFFF;\n\n    // Convert from 16-bit color to 565 color and combine the components.\n    uint32_t new_r5 = 0x1F & (dr >> 11);\n    uint32_t new_g6 = 0x3F & (dg >> 10);\n    uint32_t new_b5 = 0x1F & (db >> 11);\n    uint32_t new_rgb" +
-	"_565 = (new_r5 << 11) | (new_g6 << 5) | (new_b5 << 0);\n    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2),\n                                             (uint16_t)new_rgb_565);\n\n    s += 1 * 4;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__y(wuffs_base__slice_u8 dst,\n                                       wuffs_base__slice_u8 dst_palette,\n                                       wuffs_base__slice_u8 src) {\n  size_t dst_len2 = dst.len / 2;\n  size_t len = dst_len2 < src.len ? dst_len2 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t y5 = s[0] >> 3;\n    uint32_t y6 = s[0] >> 2;\n    uint32_t rgb_565 = (y5 << 11) | (y6 << 5) | (y5 << 0);\n    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);\n\n    s += 1 * 1;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__index__src(\n    wuffs_base__slice_u8" +
-	" dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len2 = dst.len / 2;\n  size_t len = dst_len2 < src.len ? dst_len2 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  while (n >= loop_unroll_count) {\n    wuffs_base__store_u16le__no_bounds_check(\n        d + (0 * 2), wuffs_base__load_u16le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[0] * 4)));\n    wuffs_base__store_u16le__no_bounds_check(\n        d + (1 * 2), wuffs_base__load_u16le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[1] * 4)));\n    wuffs_base__store_u16le__no_bounds_check(\n        d + (2 * 2), wuffs_base__load_u16le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[2] * 4)));\n    wuffs_base__store_u16le__no_bounds_check(\n        d + (3 * 2), wuffs_base__load_u16le__no_bounds_check(\n                         dst_palette.ptr" +
-	" + ((size_t)s[3] * 4)));\n\n    s += loop_unroll_count * 1;\n    d += loop_unroll_count * 2;\n    n -= loop_unroll_count;\n  }\n\n  while (n >= 1) {\n    wuffs_base__store_u16le__no_bounds_check(\n        d + (0 * 2), wuffs_base__load_u16le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[0] * 4)));\n\n    s += 1 * 1;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len2 = dst.len / 2;\n  size_t len = dst_len2 < src.len ? dst_len2 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__store_u16le__no_bounds" +
-	"_check(d + (0 * 2), (uint16_t)s0);\n    }\n\n    s += 1 * 1;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
-	"" +
-	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  size_t dst_len3 = dst.len / 3;\n  size_t src_len4 = src.len / 4;\n  size_t len = dst_len3 < src_len4 ? dst_len3 : src_len4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t s0 =\n        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(\n            wuffs_base__load_u32le__no_bounds_check(s + (0 * 4)));\n    wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  size_t dst_len3 = dst.len / 3;\n  size_t src_len4 = src.len / 4;\n  size_t len = dst_len3 < src_len4 ? dst_len3 : src_len4;\n  uint8_t* d = d" +
-	"st.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    // Convert from 8-bit color to 16-bit color.\n    uint32_t sa = 0x101 * ((uint32_t)s[3]);\n    uint32_t sr = 0x101 * ((uint32_t)s[2]);\n    uint32_t sg = 0x101 * ((uint32_t)s[1]);\n    uint32_t sb = 0x101 * ((uint32_t)s[0]);\n    uint32_t dr = 0x101 * ((uint32_t)d[2]);\n    uint32_t dg = 0x101 * ((uint32_t)d[1]);\n    uint32_t db = 0x101 * ((uint32_t)d[0]);\n\n    // Calculate the inverse of the src-alpha: how much of the dst to keep.\n    uint32_t ia = 0xFFFF - sa;\n\n    // Composite src (nonpremul) over dst (premul).\n    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;\n    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;\n    db = ((sb * sa) + (db * ia)) / 0xFFFF;\n\n    // Convert from 16-bit color to 8-bit color.\n    d[0] = (uint8_t)(db >> 8);\n    d[1] = (uint8_t)(dg >> 8);\n    d[2] = (uint8_t)(dr >> 8);\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
-	"" +
-	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  size_t dst_len4 = dst.len / 4;\n  size_t src_len4 = src.len / 4;\n  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t d0 = wuffs_base__load_u32le__no_bounds_check(d + (0 * 4));\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4),\n        wuffs_base__composite_nonpremul_nonpremul_u32_axxx(d0, s0));\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
-	"" +
-	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  size_t dst_len4 = dst.len / 4;\n  size_t src_len4 = src.len / 4;\n  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4),\n        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(s0));\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  size_t dst_len4 = dst.len / 4;\n  size_t src_len4 = src.len / 4;\n  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;\n  ui" +
-	"nt8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t d0 = wuffs_base__load_u32le__no_bounds_check(d + (0 * 4));\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4), wuffs_base__composite_premul_nonpremul_u32_axxx(d0, s0));\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
-	"" +
-	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxx__index__src(wuffs_base__slice_u8 dst,\n                                            wuffs_base__slice_u8 dst_palette,\n                                            wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len3 = dst.len / 3;\n  size_t len = dst_len3 < src.len ? dst_len3 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  // The comparison in the while condition is \">\", not \">=\", because with\n  // \">=\", the last 4-byte store could write past the end of the dst slice.\n  //\n  // Each 4-byte store writes one too many bytes, but a subsequent store\n  // will overwrite that with the correct byte. There is always another\n  // store, whether a 4-byte store in this loop or a 1-byte store in the\n  // next loop.\n  while (n > loop_unroll_count) {\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 3), wuffs_base__load_u32le__no_bounds_c" +
-	"heck(\n                         dst_palette.ptr + ((size_t)s[0] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (1 * 3), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[1] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (2 * 3), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[2] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (3 * 3), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[3] * 4)));\n\n    s += loop_unroll_count * 1;\n    d += loop_unroll_count * 3;\n    n -= loop_unroll_count;\n  }\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);\n\n    s += 1 * 1;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__" +
-	"pixel_swizzler__xxx__index_binary_alpha__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len3 = dst.len / 3;\n  size_t len = dst_len3 < src.len ? dst_len3 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  while (n >= loop_unroll_count) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);\n    }\n    uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[1] * 4));\n    if (s1) {\n      wuffs_base__store_u24le__no_bounds_check(d + (1 * 3), s1);\n    }\n    uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                    " +
-	"                      ((size_t)s[2] * 4));\n    if (s2) {\n      wuffs_base__store_u24le__no_bounds_check(d + (2 * 3), s2);\n    }\n    uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[3] * 4));\n    if (s3) {\n      wuffs_base__store_u24le__no_bounds_check(d + (3 * 3), s3);\n    }\n\n    s += loop_unroll_count * 1;\n    d += loop_unroll_count * 3;\n    n -= loop_unroll_count;\n  }\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);\n    }\n\n    s += 1 * 1;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxx__y(wuffs_base__slice_u8 dst,\n                                   wuffs_base__slice_u8 dst_palette,\n                                   wuffs_base__slice_u8 src) {\n  size_t dst_len3 =" +
-	" dst.len / 3;\n  size_t len = dst_len3 < src.len ? dst_len3 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    d[0] = s0;\n    d[1] = s0;\n    d[2] = s0;\n\n    s += 1 * 1;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
-	"" +
-	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxx__index__src(wuffs_base__slice_u8 dst,\n                                             wuffs_base__slice_u8 dst_palette,\n                                             wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len4 = dst.len / 4;\n  size_t len = dst_len4 < src.len ? dst_len4 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  while (n >= loop_unroll_count) {\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[0] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (1 * 4), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[1] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (2 * 4), wuffs_base__load_u32le__no_bounds_check(\n                         dst_pale" +
-	"tte.ptr + ((size_t)s[2] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (3 * 4), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[3] * 4)));\n\n    s += loop_unroll_count * 1;\n    d += loop_unroll_count * 4;\n    n -= loop_unroll_count;\n  }\n\n  while (n >= 1) {\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[0] * 4)));\n\n    s += 1 * 1;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len4 = dst.len / 4;\n  size_t len = dst_len4 < src.len ? dst_len4 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  while (n >= loop_unroll_count)" +
-	" {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__store_u32le__no_bounds_check(d + (0 * 4), s0);\n    }\n    uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[1] * 4));\n    if (s1) {\n      wuffs_base__store_u32le__no_bounds_check(d + (1 * 4), s1);\n    }\n    uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[2] * 4));\n    if (s2) {\n      wuffs_base__store_u32le__no_bounds_check(d + (2 * 4), s2);\n    }\n    uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[3] * 4));\n    if (s3) {\n      wuffs_base__store_u32le__no_bounds_check(d + (3 * 4), s3);\n    }\n\n    s += loop_unroll_count * 1;\n    d += loop_unroll_count *" +
-	" 4;\n    n -= loop_unroll_count;\n  }\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__store_u32le__no_bounds_check(d + (0 * 4), s0);\n    }\n\n    s += 1 * 1;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxx__xxx(wuffs_base__slice_u8 dst,\n                                      wuffs_base__slice_u8 dst_palette,\n                                      wuffs_base__slice_u8 src) {\n  size_t dst_len4 = dst.len / 4;\n  size_t src_len3 = src.len / 3;\n  size_t len = dst_len4 < src_len3 ? dst_len4 : src_len3;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4),\n        0xFF000000 | wuffs_base__load_u24le__no_bounds_check(s + (0 * 3)));\n\n    s += 1 * 3;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n" +
-	"\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxx__y(wuffs_base__slice_u8 dst,\n                                    wuffs_base__slice_u8 dst_palette,\n                                    wuffs_base__slice_u8 src) {\n  size_t dst_len4 = dst.len / 4;\n  size_t len = dst_len4 < src.len ? dst_len4 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4), 0xFF000000 | (0x010101 * (uint32_t)s[0]));\n\n    s += 1 * 1;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
-	"" +
-	"// --------\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__y(wuffs_base__pixel_swizzler* p,\n                                       wuffs_base__pixel_format dst_format,\n                                       wuffs_base__slice_u8 dst_palette,\n                                       wuffs_base__slice_u8 src_palette,\n                                       wuffs_base__pixel_blend blend) {\n  switch (dst_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__y;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      return wuffs_base__pixel_swizzler__xxx__y;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WU" +
-	"FFS_BASE__PIXEL_FORMAT__RGBX:\n      return wuffs_base__pixel_swizzler__xxxx__y;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_format,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_1_1;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      if (wuffs_base__pixel_swizzler__squash_bgr_565_888(dst_palette,\n                                        " +
-	"                 src_palette) != 1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_pa" +
-	"lette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(dst_palette,\n                                                     src_palette) != 1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n      if (wuffs_base__pixel_swizzler__" +
-	"swap_rgbx_bgrx(dst_palette,\n                                                     src_palette) != 1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,\n                                         wuffs_base__pixel_format dst_format,\n                                         wuffs_base__slice_u8 dst_palette,\n                                         wuffs_base__slice_u8 src_palette,\n                                         wuffs_base__pixel_blend blend) {\n  switch (dst_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__B" +
-	"GR:\n      return wuffs_base__pixel_swizzler__copy_3_3;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      return wuffs_base__pixel_swizzler__xxxx__xxx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgra_nonpremul(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_format,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_b" +
-	"ase__pixel_swizzler__bgr_565__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_4_4;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swiz" +
-	"zler__bgra_premul__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\n" +
-	"" +
-	"// --------\n\nwuffs_base__status  //\nwuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,\n                                    wuffs_base__pixel_format dst_format,\n                                    wuffs_base__slice_u8 dst_palette,\n                                    wuffs_base__pixel_format src_format,\n                                    wuffs_base__slice_u8 src_palette,\n                                    wuffs_base__pixel_blend blend) {\n  if (!p) {\n    return wuffs_base__make_status(wuffs_base__error__bad_receiver);\n  }\n\n  // TODO: support many more formats.\n\n  wuffs_base__pixel_swizzler__func func = NULL;\n\n  switch (src_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__Y:\n      func = wuffs_base__pixel_swizzler__prepare__y(p, dst_format, dst_palette,\n                                                    src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:\n      func = wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(\n          p, dst_format, dst_" +
-	"palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      func = wuffs_base__pixel_swizzler__prepare__bgr(\n          p, dst_format, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      func = wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(\n          p, dst_format, dst_palette, src_palette, blend);\n      break;\n  }\n\n  p->private_impl.func = func;\n  return wuffs_base__make_status(\n      func ? NULL : wuffs_base__error__unsupported_pixel_swizzler_option);\n}\n\nuint64_t  //\nwuffs_base__pixel_swizzler__swizzle_interleaved(\n    const wuffs_base__pixel_swizzler* p,\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (p && p->private_impl.func) {\n    return (*p->private_impl.func)(dst, dst_palette, src);\n  }\n  return 0;\n}\n" +
+	";\n        uint32_t pg = 0x101 * ((uint32_t)(palette_slice.ptr[(4 * i) + 1]));\n        uint32_t pr = 0x101 * ((uint32_t)(palette_slice.ptr[(4 * i) + 2]));\n        uint32_t pa = 0x101 * ((uint32_t)(palette_slice.ptr[(4 * i) + 3]));\n\n        // Convert to premultiplied alpha.\n        if (nonpremul && (pa != 0xFFFF)) {\n          pb = (pb * pa) / 0xFFFF;\n          pg = (pg * pa) / 0xFFFF;\n          pr = (pr * pa) / 0xFFFF;\n        }\n\n        // These deltas are conceptually int32_t (signed) but after squaring,\n        // it's equivalent to work in uint32_t (unsigned).\n        pb -= cb;\n        pg -= cg;\n        pr -= cr;\n        pa -= ca;\n        uint64_t score = ((uint64_t)(pb * pb)) + ((uint64_t)(pg * pg)) +\n                         ((uint64_t)(pr * pr)) + ((uint64_t)(pa * pa));\n        if (best_score > score) {\n          best_score = score;\n          best_index = i;\n        }\n      }\n      break;\n    }\n  }\n\n  return (uint8_t)best_index;\n}\n" +
 	""
 
 const baseStrConvImplC = "" +
@@ -105,8 +56,21 @@
 	"alue = +(int64_t)(r.value);\n      return ret;\n    }\n  } while (0);\n\nfail_bad_argument:\n  do {\n    wuffs_base__result_i64 ret;\n    ret.status.repr = wuffs_base__error__bad_argument;\n    ret.value = 0;\n    return ret;\n  } while (0);\n\nfail_out_of_bounds:\n  do {\n    wuffs_base__result_i64 ret;\n    ret.status.repr = wuffs_base__error__out_of_bounds;\n    ret.value = 0;\n    return ret;\n  } while (0);\n}\n\nwuffs_base__result_u64  //\nwuffs_base__parse_number_u64(wuffs_base__slice_u8 s) {\n  uint8_t* p = s.ptr;\n  uint8_t* q = s.ptr + s.len;\n\n  for (; (p < q) && (*p == '_'); p++) {\n  }\n\n  if (p >= q) {\n    goto fail_bad_argument;\n\n  } else if (*p == '0') {\n    p++;\n    if (p >= q) {\n      goto ok_zero;\n    }\n    if (*p == '_') {\n      p++;\n      for (; p < q; p++) {\n        if (*p != '_') {\n          goto fail_bad_argument;\n        }\n      }\n      goto ok_zero;\n    }\n\n    if ((*p == 'x') || (*p == 'X')) {\n      p++;\n      for (; (p < q) && (*p == '_'); p++) {\n      }\n      if (p < q) {\n        goto hexadecimal;\n      }\n\n  " +
 	"  } else if ((*p == 'd') || (*p == 'D')) {\n      p++;\n      for (; (p < q) && (*p == '_'); p++) {\n      }\n      if (p < q) {\n        goto decimal;\n      }\n    }\n\n    goto fail_bad_argument;\n  }\n\ndecimal:\n  do {\n    uint64_t v = wuffs_base__parse_number__decimal_digits[*p++];\n    if (v == 0) {\n      goto fail_bad_argument;\n    }\n    v &= 0x0F;\n\n    // UINT64_MAX is 18446744073709551615, which is ((10 * max10) + max1).\n    const uint64_t max10 = 1844674407370955161;\n    const uint8_t max1 = 5;\n\n    for (; p < q; p++) {\n      if (*p == '_') {\n        continue;\n      }\n      uint8_t digit = wuffs_base__parse_number__decimal_digits[*p];\n      if (digit == 0) {\n        goto fail_bad_argument;\n      }\n      digit &= 0x0F;\n      if ((v > max10) || ((v == max10) && (digit > max1))) {\n        goto fail_out_of_bounds;\n      }\n      v = (10 * v) + ((uint64_t)(digit));\n    }\n\n    wuffs_base__result_u64 ret;\n    ret.status.repr = NULL;\n    ret.value = v;\n    return ret;\n  } while (0);\n\nhexadecimal:\n  do {\n    uint64_t v = " +
 	"wuffs_base__parse_number__hexadecimal_digits[*p++];\n    if (v == 0) {\n      goto fail_bad_argument;\n    }\n    v &= 0x0F;\n\n    for (; p < q; p++) {\n      if (*p == '_') {\n        continue;\n      }\n      uint8_t digit = wuffs_base__parse_number__hexadecimal_digits[*p];\n      if (digit == 0) {\n        goto fail_bad_argument;\n      }\n      digit &= 0x0F;\n      if ((v >> 60) != 0) {\n        goto fail_out_of_bounds;\n      }\n      v = (v << 4) | ((uint64_t)(digit));\n    }\n\n    wuffs_base__result_u64 ret;\n    ret.status.repr = NULL;\n    ret.value = v;\n    return ret;\n  } while (0);\n\nok_zero:\n  do {\n    wuffs_base__result_u64 ret;\n    ret.status.repr = NULL;\n    ret.value = 0;\n    return ret;\n  } while (0);\n\nfail_bad_argument:\n  do {\n    wuffs_base__result_u64 ret;\n    ret.status.repr = wuffs_base__error__bad_argument;\n    ret.value = 0;\n    return ret;\n  } while (0);\n\nfail_out_of_bounds:\n  do {\n    wuffs_base__result_u64 ret;\n    ret.status.repr = wuffs_base__error__out_of_bounds;\n    ret.value = 0;\n    return ret;\n " +
-	" } while (0);\n}\n\n  " +
+	" } while (0);\n}\n\n" +
 	"" +
+	"// ---------------- Hexadecimal\n\nsize_t  //\nwuffs_base__hexadecimal__decode2(wuffs_base__slice_u8 dst,\n                                 wuffs_base__slice_u8 src) {\n  size_t src_len2 = src.len / 2;\n  size_t len = dst.len < src_len2 ? dst.len : src_len2;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  while (n--) {\n    *d = (uint8_t)((wuffs_base__parse_number__hexadecimal_digits[s[0]] << 4) |\n                   (wuffs_base__parse_number__hexadecimal_digits[s[1]] & 0x0F));\n    d += 1;\n    s += 2;\n  }\n\n  return len;\n}\n\nsize_t  //\nwuffs_base__hexadecimal__decode4(wuffs_base__slice_u8 dst,\n                                 wuffs_base__slice_u8 src) {\n  size_t src_len4 = src.len / 4;\n  size_t len = dst.len < src_len4 ? dst.len : src_len4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  while (n--) {\n    *d = (uint8_t)((wuffs_base__parse_number__hexadecimal_digits[s[2]] << 4) |\n                   (wuffs_base__parse_number__hexadecimal_digits[s[3]] & 0x0F));\n    d += 1;\n    s +" +
+	"= 4;\n  }\n\n  return len;\n}\n\n" +
+	"" +
+	"// ---------------- Unicode and UTF-8\n\nsize_t  //\nwuffs_base__utf_8__encode(wuffs_base__slice_u8 dst, uint32_t code_point) {\n  if (code_point <= 0x7F) {\n    if (dst.len >= 1) {\n      dst.ptr[0] = (uint8_t)(code_point);\n      return 1;\n    }\n\n  } else if (code_point <= 0x07FF) {\n    if (dst.len >= 2) {\n      dst.ptr[0] = (uint8_t)(0xC0 | ((code_point >> 6)));\n      dst.ptr[1] = (uint8_t)(0x80 | ((code_point >> 0) & 0x3F));\n      return 2;\n    }\n\n  } else if (code_point <= 0xFFFF) {\n    if ((dst.len >= 3) && ((code_point < 0xD800) || (0xDFFF < code_point))) {\n      dst.ptr[0] = (uint8_t)(0xE0 | ((code_point >> 12)));\n      dst.ptr[1] = (uint8_t)(0x80 | ((code_point >> 6) & 0x3F));\n      dst.ptr[2] = (uint8_t)(0x80 | ((code_point >> 0) & 0x3F));\n      return 3;\n    }\n\n  } else if (code_point <= 0x10FFFF) {\n    if (dst.len >= 4) {\n      dst.ptr[0] = (uint8_t)(0xF0 | ((code_point >> 18)));\n      dst.ptr[1] = (uint8_t)(0x80 | ((code_point >> 12) & 0x3F));\n      dst.ptr[2] = (uint8_t)(0x80 | ((code_point >> 6) & 0x3" +
+	"F));\n      dst.ptr[3] = (uint8_t)(0x80 | ((code_point >> 0) & 0x3F));\n      return 4;\n    }\n  }\n\n  return 0;\n}\n\n// wuffs_base__utf_8__byte_length_minus_1 is the byte length (minus 1) of a\n// UTF-8 encoded code point, based on the encoding's initial byte.\n//  - 0x00 is 1-byte UTF-8 (ASCII).\n//  - 0x01 is the start of 2-byte UTF-8.\n//  - 0x02 is the start of 3-byte UTF-8.\n//  - 0x03 is the start of 4-byte UTF-8.\n//  - 0x40 is a UTF-8 tail byte.\n//  - 0x80 is invalid UTF-8.\n//\n// RFC 3629 (UTF-8) gives this grammar for valid UTF-8:\n//    UTF8-1      = %x00-7F\n//    UTF8-2      = %xC2-DF UTF8-tail\n//    UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /\n//                  %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )\n//    UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /\n//                  %xF4 %x80-8F 2( UTF8-tail )\n//    UTF8-tail   = %x80-BF\nstatic const uint8_t wuffs_base__utf_8__byte_length_minus_1[256] = {\n    // 0     1     2     3     4     5     6     7\n    // 8     9" +
+	"     A     B     C     D     E     F\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x00 ..= 0x07.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x08 ..= 0x0F.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x10 ..= 0x17.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x18 ..= 0x1F.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x20 ..= 0x27.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x28 ..= 0x2F.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x30 ..= 0x37.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x38 ..= 0x3F.\n\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x40 ..= 0x47.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x48 ..= 0x4F.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x50 ..= 0x57.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x58 ..= 0x5F.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x60 ..= 0x67.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x68 .." +
+	"= 0x6F.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x70 ..= 0x77.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x78 ..= 0x7F.\n\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x80 ..= 0x87.\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x88 ..= 0x8F.\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x90 ..= 0x97.\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x98 ..= 0x9F.\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xA0 ..= 0xA7.\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xA8 ..= 0xAF.\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xB0 ..= 0xB7.\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xB8 ..= 0xBF.\n\n    0x80, 0x80, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xC0 ..= 0xC7.\n    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xC8 ..= 0xCF.\n    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xD0 ..= 0xD7.\n    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xD8 ..= 0xDF.\n    0x02, 0x02, 0x02" +
+	", 0x02, 0x02, 0x02, 0x02, 0x02,  // 0xE0 ..= 0xE7.\n    0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,  // 0xE8 ..= 0xEF.\n    0x03, 0x03, 0x03, 0x03, 0x03, 0x80, 0x80, 0x80,  // 0xF0 ..= 0xF7.\n    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,  // 0xF8 ..= 0xFF.\n    // 0     1     2     3     4     5     6     7\n    // 8     9     A     B     C     D     E     F\n};\n\nwuffs_base__utf_8__next__output  //\nwuffs_base__utf_8__next(wuffs_base__slice_u8 s) {\n  if (s.len == 0) {\n    return wuffs_base__make_utf_8__next__output(0, 0);\n  }\n  uint32_t c = s.ptr[0];\n  switch (wuffs_base__utf_8__byte_length_minus_1[c & 0xFF]) {\n    case 0:\n      return wuffs_base__make_utf_8__next__output(c, 1);\n\n    case 1:\n      if (s.len < 2) {\n        break;\n      }\n      c = wuffs_base__load_u16le__no_bounds_check(s.ptr);\n      if ((c & 0xC000) != 0x8000) {\n        break;\n      }\n      c = (0x0007C0 & (c << 6)) | (0x00003F & (c >> 8));\n      return wuffs_base__make_utf_8__next__output(c, 2);\n\n    case 2:\n      if (s.len < 3) {\n       " +
+	" break;\n      }\n      c = wuffs_base__load_u24le__no_bounds_check(s.ptr);\n      if ((c & 0xC0C000) != 0x808000) {\n        break;\n      }\n      c = (0x00F000 & (c << 12)) | (0x000FC0 & (c >> 2)) |\n          (0x00003F & (c >> 16));\n      if ((c <= 0x07FF) || ((0xD800 <= c) && (c <= 0xDFFF))) {\n        break;\n      }\n      return wuffs_base__make_utf_8__next__output(c, 3);\n\n    case 3:\n      if (s.len < 4) {\n        break;\n      }\n      c = wuffs_base__load_u32le__no_bounds_check(s.ptr);\n      if ((c & 0xC0C0C000) != 0x80808000) {\n        break;\n      }\n      c = (0x1C0000 & (c << 18)) | (0x03F000 & (c << 4)) |\n          (0x000FC0 & (c >> 10)) | (0x00003F & (c >> 24));\n      if ((c <= 0xFFFF) || (0x110000 <= c)) {\n        break;\n      }\n      return wuffs_base__make_utf_8__next__output(c, 4);\n  }\n\n  return wuffs_base__make_utf_8__next__output(\n      WUFFS_BASE__UNICODE_REPLACEMENT_CHARACTER, 1);\n}\n\nsize_t  //\nwuffs_base__utf_8__longest_valid_prefix(wuffs_base__slice_u8 s) {\n  // TODO: possibly optimize the all-A" +
+	"SCII case (4 or 8 bytes at a time).\n  //\n  // TODO: possibly optimize this by manually inlining the\n  // wuffs_base__utf_8__next calls.\n  size_t original_len = s.len;\n  while (s.len > 0) {\n    wuffs_base__utf_8__next__output o = wuffs_base__utf_8__next(s);\n    if ((o.code_point > 0x7F) && (o.byte_length == 1)) {\n      break;\n    }\n    s.ptr += o.byte_length;\n    s.len -= o.byte_length;\n  }\n  return original_len - s.len;\n}\n\nsize_t  //\nwuffs_base__ascii__longest_valid_prefix(wuffs_base__slice_u8 s) {\n  // TODO: possibly optimize this by checking 4 or 8 bytes at a time.\n  uint8_t* original_ptr = s.ptr;\n  uint8_t* p = s.ptr;\n  uint8_t* q = s.ptr + s.len;\n  for (; (p != q) && ((*p & 0x80) == 0); p++) {\n  }\n  return (size_t)(p - original_ptr);\n}\n" +
+	""
+
+const baseF64ConvSubmoduleC = "" +
 	"// ---------------- IEEE 754 Floating Point\n\n#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE 1023\n#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION 500\n\n// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL is the largest N\n// such that ((10 << N) < (1 << 64)).\n#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL 60\n\n// wuffs_base__private_implementation__high_prec_dec (abbreviated as HPD) is a\n// fixed precision floating point decimal number, augmented with ±infinity\n// values, but it cannot represent NaN (Not a Number).\n//\n// \"High precision\" means that the mantissa holds 500 decimal digits. 500 is\n// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION.\n//\n// An HPD isn't for general purpose arithmetic, only for conversions to and\n// from IEEE 754 double-precision floating point, where the largest and\n// smallest positive, finite values are approximately 1.8e+308 and 4.9e-324.\n// HPD exponents above +1023 mean infinity, below -1023 mean zero. Th" +
 	"e ±1023\n// bounds are further away from zero than ±(324 + 500), where 500 and 1023 is\n// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION and\n// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.\n//\n// digits[.. num_digits] are the number's digits in big-endian order. The\n// uint8_t values are in the range [0 ..= 9], not ['0' ..= '9'], where e.g. '7'\n// is the ASCII value 0x37.\n//\n// decimal_point is the index (within digits) of the decimal point. It may be\n// negative or be larger than num_digits, in which case the explicit digits are\n// padded with implicit zeroes.\n//\n// For example, if num_digits is 3 and digits is \"\\x07\\x08\\x09\":\n//   - A decimal_point of -2 means \".00789\"\n//   - A decimal_point of -1 means \".0789\"\n//   - A decimal_point of +0 means \".789\"\n//   - A decimal_point of +1 means \"7.89\"\n//   - A decimal_point of +2 means \"78.9\"\n//   - A decimal_point of +3 means \"789.\"\n//   - A decimal_point of +4 means \"7890.\"\n//   - A decimal_point of +5 means \"78900.\"\n//\n// As above, a" +
 	" decimal_point higher than +1023 means that the overall value is\n// infinity, lower than -1023 means zero.\n//\n// negative is a sign bit. An HPD can distinguish positive and negative zero.\n//\n// truncated is whether there are more than\n// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION digits, and at\n// least one of those extra digits are non-zero. The existence of long-tail\n// digits can affect rounding.\n//\n// The \"all fields are zero\" value is valid, and represents the number +0.\ntypedef struct {\n  uint32_t num_digits;\n  int32_t decimal_point;\n  bool negative;\n  bool truncated;\n  uint8_t digits[WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION];\n} wuffs_base__private_implementation__high_prec_dec;\n\n// wuffs_base__private_implementation__high_prec_dec__trim trims trailing\n// zeroes from the h->digits[.. h->num_digits] slice. They have no benefit,\n// since we explicitly track h->decimal_point.\n//\n// Preconditions:\n//  - h is non-NULL.\nstatic inline void  //\nwuffs_base__private_implementation_" +
@@ -162,18 +126,62 @@
 	"shift =\n          (n < num_powers)\n              ? powers[n]\n              : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;\n\n      wuffs_base__private_implementation__high_prec_dec__small_rshift(&h,\n                                                                      shift);\n      if (h.decimal_point <\n          -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {\n        goto zero;\n      }\n      exp2 += (int32_t)shift;\n    }\n    // ...then we shift left, putting us in [½ .. 1].\n    while (h.decimal_point <= 0) {\n      uint32_t shift;\n      if (h.decimal_point == 0) {\n        if (h.digits[0] >= 5) {\n          break;\n        }\n        shift = (h.digits[0] <= 2) ? 2 : 1;\n      } else {\n        uint32_t n = (uint32_t)(-h.decimal_point);\n        shift = (n < num_powers)\n                    ? powers[n]\n                    : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;\n      }\n\n      wuffs_base__private_implementation__high_prec_dec__small_lshift(&h,\n                          " +
 	"                                            shift);\n      if (h.decimal_point >\n          +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {\n        goto infinity;\n      }\n      exp2 -= (int32_t)shift;\n    }\n\n    // We're in the range [½ .. 1] but f64 uses [1 .. 2].\n    exp2--;\n\n    // The minimum normal exponent is (f64_bias + 1).\n    while ((f64_bias + 1) > exp2) {\n      uint32_t n = (uint32_t)((f64_bias + 1) - exp2);\n      if (n > WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL) {\n        n = WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;\n      }\n      wuffs_base__private_implementation__high_prec_dec__small_rshift(&h, n);\n      exp2 += (int32_t)n;\n    }\n\n    // Check for overflow.\n    if ((exp2 - f64_bias) >= 0x07FF) {  // (1 << 11) - 1.\n      goto infinity;\n    }\n\n    // Extract 53 bits for the mantissa (in base-2).\n    wuffs_base__private_implementation__high_prec_dec__small_lshift(&h, 53);\n    uint64_t man2 =\n        wuffs_base__private_implementation__high_prec_dec_" +
 	"_rounded_integer(&h);\n\n    // Rounding might have added one bit. If so, shift and re-check overflow.\n    if ((man2 >> 53) != 0) {\n      man2 >>= 1;\n      exp2++;\n      if ((exp2 - f64_bias) >= 0x07FF) {  // (1 << 11) - 1.\n        goto infinity;\n      }\n    }\n\n    // Handle subnormal numbers.\n    if ((man2 >> 52) == 0) {\n      exp2 = f64_bias;\n    }\n\n    // Pack the bits and return.\n    uint64_t exp2_bits =\n        (uint64_t)((exp2 - f64_bias) & 0x07FF);             // (1 << 11) - 1.\n    uint64_t bits = (man2 & 0x000FFFFFFFFFFFFF) |           // (1 << 52) - 1.\n                    (exp2_bits << 52) |                     //\n                    (h.negative ? 0x8000000000000000 : 0);  // (1 << 63).\n\n    wuffs_base__result_f64 ret;\n    ret.status.repr = NULL;\n    ret.value = wuffs_base__ieee_754_bit_representation__to_f64(bits);\n    return ret;\n  } while (0);\n\nzero:\n  do {\n    uint64_t bits = h.negative ? 0x8000000000000000 : 0;\n\n    wuffs_base__result_f64 ret;\n    ret.status.repr = NULL;\n    ret.value = wuffs_base" +
-	"__ieee_754_bit_representation__to_f64(bits);\n    return ret;\n  } while (0);\n\ninfinity:\n  do {\n    uint64_t bits = h.negative ? 0xFFF0000000000000 : 0x7FF0000000000000;\n\n    wuffs_base__result_f64 ret;\n    ret.status.repr = NULL;\n    ret.value = wuffs_base__ieee_754_bit_representation__to_f64(bits);\n    return ret;\n  } while (0);\n}\n\n" +
+	"__ieee_754_bit_representation__to_f64(bits);\n    return ret;\n  } while (0);\n\ninfinity:\n  do {\n    uint64_t bits = h.negative ? 0xFFF0000000000000 : 0x7FF0000000000000;\n\n    wuffs_base__result_f64 ret;\n    ret.status.repr = NULL;\n    ret.value = wuffs_base__ieee_754_bit_representation__to_f64(bits);\n    return ret;\n  } while (0);\n}\n" +
+	""
+
+const basePixConvSubmoduleC = "" +
+	"// ---------------- Pixel Swizzler\n\nstatic inline uint32_t  //\nwuffs_base__composite_nonpremul_nonpremul_u32_axxx(uint32_t dst_nonpremul,\n                                                   uint32_t src_nonpremul) {\n  // Convert from 8-bit color to 16-bit color.\n  uint32_t sa = 0x101 * (0xFF & (src_nonpremul >> 24));\n  uint32_t sr = 0x101 * (0xFF & (src_nonpremul >> 16));\n  uint32_t sg = 0x101 * (0xFF & (src_nonpremul >> 8));\n  uint32_t sb = 0x101 * (0xFF & (src_nonpremul >> 0));\n  uint32_t da = 0x101 * (0xFF & (dst_nonpremul >> 24));\n  uint32_t dr = 0x101 * (0xFF & (dst_nonpremul >> 16));\n  uint32_t dg = 0x101 * (0xFF & (dst_nonpremul >> 8));\n  uint32_t db = 0x101 * (0xFF & (dst_nonpremul >> 0));\n\n  // Convert dst from nonpremul to premul.\n  dr = (dr * da) / 0xFFFF;\n  dg = (dg * da) / 0xFFFF;\n  db = (db * da) / 0xFFFF;\n\n  // Calculate the inverse of the src-alpha: how much of the dst to keep.\n  uint32_t ia = 0xFFFF - sa;\n\n  // Composite src (nonpremul) over dst (premul).\n  da = sa + ((da * ia) / 0xFFFF);\n  dr" +
+	" = ((sr * sa) + (dr * ia)) / 0xFFFF;\n  dg = ((sg * sa) + (dg * ia)) / 0xFFFF;\n  db = ((sb * sa) + (db * ia)) / 0xFFFF;\n\n  // Convert dst from premul to nonpremul.\n  if (da != 0) {\n    dr = (dr * 0xFFFF) / da;\n    dg = (dg * 0xFFFF) / da;\n    db = (db * 0xFFFF) / da;\n  }\n\n  // Convert from 16-bit color to 8-bit color and combine the components.\n  da >>= 8;\n  dr >>= 8;\n  dg >>= 8;\n  db >>= 8;\n  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);\n}\n\nstatic inline uint32_t  //\nwuffs_base__composite_nonpremul_premul_u32_axxx(uint32_t dst_nonpremul,\n                                                uint32_t src_premul) {\n  // Convert from 8-bit color to 16-bit color.\n  uint32_t sa = 0x101 * (0xFF & (src_premul >> 24));\n  uint32_t sr = 0x101 * (0xFF & (src_premul >> 16));\n  uint32_t sg = 0x101 * (0xFF & (src_premul >> 8));\n  uint32_t sb = 0x101 * (0xFF & (src_premul >> 0));\n  uint32_t da = 0x101 * (0xFF & (dst_nonpremul >> 24));\n  uint32_t dr = 0x101 * (0xFF & (dst_nonpremul >> 16));\n  uint32_t dg = 0x101 * (0xFF " +
+	"& (dst_nonpremul >> 8));\n  uint32_t db = 0x101 * (0xFF & (dst_nonpremul >> 0));\n\n  // Convert dst from nonpremul to premul.\n  dr = (dr * da) / 0xFFFF;\n  dg = (dg * da) / 0xFFFF;\n  db = (db * da) / 0xFFFF;\n\n  // Calculate the inverse of the src-alpha: how much of the dst to keep.\n  uint32_t ia = 0xFFFF - sa;\n\n  // Composite src (premul) over dst (premul).\n  da = sa + ((da * ia) / 0xFFFF);\n  dr = sr + ((dr * ia) / 0xFFFF);\n  dg = sg + ((dg * ia) / 0xFFFF);\n  db = sb + ((db * ia) / 0xFFFF);\n\n  // Convert dst from premul to nonpremul.\n  if (da != 0) {\n    dr = (dr * 0xFFFF) / da;\n    dg = (dg * 0xFFFF) / da;\n    db = (db * 0xFFFF) / da;\n  }\n\n  // Convert from 16-bit color to 8-bit color and combine the components.\n  da >>= 8;\n  dr >>= 8;\n  dg >>= 8;\n  db >>= 8;\n  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);\n}\n\nstatic inline uint32_t  //\nwuffs_base__composite_premul_nonpremul_u32_axxx(uint32_t dst_premul,\n                                                uint32_t src_nonpremul) {\n  // Convert from 8-bit c" +
+	"olor to 16-bit color.\n  uint32_t sa = 0x101 * (0xFF & (src_nonpremul >> 24));\n  uint32_t sr = 0x101 * (0xFF & (src_nonpremul >> 16));\n  uint32_t sg = 0x101 * (0xFF & (src_nonpremul >> 8));\n  uint32_t sb = 0x101 * (0xFF & (src_nonpremul >> 0));\n  uint32_t da = 0x101 * (0xFF & (dst_premul >> 24));\n  uint32_t dr = 0x101 * (0xFF & (dst_premul >> 16));\n  uint32_t dg = 0x101 * (0xFF & (dst_premul >> 8));\n  uint32_t db = 0x101 * (0xFF & (dst_premul >> 0));\n\n  // Calculate the inverse of the src-alpha: how much of the dst to keep.\n  uint32_t ia = 0xFFFF - sa;\n\n  // Composite src (nonpremul) over dst (premul).\n  da = sa + ((da * ia) / 0xFFFF);\n  dr = ((sr * sa) + (dr * ia)) / 0xFFFF;\n  dg = ((sg * sa) + (dg * ia)) / 0xFFFF;\n  db = ((sb * sa) + (db * ia)) / 0xFFFF;\n\n  // Convert from 16-bit color to 8-bit color and combine the components.\n  da >>= 8;\n  dr >>= 8;\n  dg >>= 8;\n  db >>= 8;\n  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);\n}\n\nstatic inline uint32_t  //\nwuffs_base__composite_premul_premul_u32_axxx(ui" +
+	"nt32_t dst_premul,\n                                             uint32_t src_premul) {\n  // Convert from 8-bit color to 16-bit color.\n  uint32_t sa = 0x101 * (0xFF & (src_premul >> 24));\n  uint32_t sr = 0x101 * (0xFF & (src_premul >> 16));\n  uint32_t sg = 0x101 * (0xFF & (src_premul >> 8));\n  uint32_t sb = 0x101 * (0xFF & (src_premul >> 0));\n  uint32_t da = 0x101 * (0xFF & (dst_premul >> 24));\n  uint32_t dr = 0x101 * (0xFF & (dst_premul >> 16));\n  uint32_t dg = 0x101 * (0xFF & (dst_premul >> 8));\n  uint32_t db = 0x101 * (0xFF & (dst_premul >> 0));\n\n  // Calculate the inverse of the src-alpha: how much of the dst to keep.\n  uint32_t ia = 0xFFFF - sa;\n\n  // Composite src (premul) over dst (premul).\n  da = sa + ((da * ia) / 0xFFFF);\n  dr = sr + ((dr * ia) / 0xFFFF);\n  dg = sg + ((dg * ia) / 0xFFFF);\n  db = sb + ((db * ia) / 0xFFFF);\n\n  // Convert from 16-bit color to 8-bit color and combine the components.\n  da >>= 8;\n  dr >>= 8;\n  dg >>= 8;\n  db >>= 8;\n  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);\n}" +
+	"\n\n" +
 	"" +
-	"// ---------------- Hexadecimal\n\nsize_t  //\nwuffs_base__hexadecimal__decode2(wuffs_base__slice_u8 dst,\n                                 wuffs_base__slice_u8 src) {\n  size_t src_len2 = src.len / 2;\n  size_t len = dst.len < src_len2 ? dst.len : src_len2;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  while (n--) {\n    *d = (uint8_t)((wuffs_base__parse_number__hexadecimal_digits[s[0]] << 4) |\n                   (wuffs_base__parse_number__hexadecimal_digits[s[1]] & 0x0F));\n    d += 1;\n    s += 2;\n  }\n\n  return len;\n}\n\nsize_t  //\nwuffs_base__hexadecimal__decode4(wuffs_base__slice_u8 dst,\n                                 wuffs_base__slice_u8 src) {\n  size_t src_len4 = src.len / 4;\n  size_t len = dst.len < src_len4 ? dst.len : src_len4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  while (n--) {\n    *d = (uint8_t)((wuffs_base__parse_number__hexadecimal_digits[s[2]] << 4) |\n                   (wuffs_base__parse_number__hexadecimal_digits[s[3]] & 0x0F));\n    d += 1;\n    s +" +
-	"= 4;\n  }\n\n  return len;\n}\n\n" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__squash_bgr_565_888(wuffs_base__slice_u8 dst,\n                                               wuffs_base__slice_u8 src) {\n  size_t len4 = (dst.len < src.len ? dst.len : src.len) / 4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n\n  size_t n = len4;\n  while (n--) {\n    uint32_t argb = wuffs_base__load_u32le__no_bounds_check(s);\n    uint32_t b5 = 0x1F & (argb >> (8 - 5));\n    uint32_t g6 = 0x3F & (argb >> (16 - 6));\n    uint32_t r5 = 0x1F & (argb >> (24 - 5));\n    uint32_t alpha = argb & 0xFF000000;\n    wuffs_base__store_u32le__no_bounds_check(\n        d, alpha | (r5 << 11) | (g6 << 5) | (b5 << 0));\n    s += 4;\n    d += 4;\n  }\n  return len4 * 4;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__swap_rgbx_bgrx(wuffs_base__slice_u8 dst,\n                                           wuffs_base__slice_u8 src) {\n  size_t len4 = (dst.len < src.len ? dst.len : src.len) / 4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n\n  size_t n = len4;\n  while (n--) {\n   " +
+	" uint8_t b0 = s[0];\n    uint8_t b1 = s[1];\n    uint8_t b2 = s[2];\n    uint8_t b3 = s[3];\n    d[0] = b2;\n    d[1] = b1;\n    d[2] = b0;\n    d[3] = b3;\n    s += 4;\n    d += 4;\n  }\n  return len4 * 4;\n}\n\n" +
 	"" +
-	"// ---------------- Unicode and UTF-8\n\nsize_t  //\nwuffs_base__utf_8__encode(wuffs_base__slice_u8 dst, uint32_t code_point) {\n  if (code_point <= 0x7F) {\n    if (dst.len >= 1) {\n      dst.ptr[0] = (uint8_t)(code_point);\n      return 1;\n    }\n\n  } else if (code_point <= 0x07FF) {\n    if (dst.len >= 2) {\n      dst.ptr[0] = (uint8_t)(0xC0 | ((code_point >> 6)));\n      dst.ptr[1] = (uint8_t)(0x80 | ((code_point >> 0) & 0x3F));\n      return 2;\n    }\n\n  } else if (code_point <= 0xFFFF) {\n    if ((dst.len >= 3) && ((code_point < 0xD800) || (0xDFFF < code_point))) {\n      dst.ptr[0] = (uint8_t)(0xE0 | ((code_point >> 12)));\n      dst.ptr[1] = (uint8_t)(0x80 | ((code_point >> 6) & 0x3F));\n      dst.ptr[2] = (uint8_t)(0x80 | ((code_point >> 0) & 0x3F));\n      return 3;\n    }\n\n  } else if (code_point <= 0x10FFFF) {\n    if (dst.len >= 4) {\n      dst.ptr[0] = (uint8_t)(0xF0 | ((code_point >> 18)));\n      dst.ptr[1] = (uint8_t)(0x80 | ((code_point >> 12) & 0x3F));\n      dst.ptr[2] = (uint8_t)(0x80 | ((code_point >> 6) & 0x3" +
-	"F));\n      dst.ptr[3] = (uint8_t)(0x80 | ((code_point >> 0) & 0x3F));\n      return 4;\n    }\n  }\n\n  return 0;\n}\n\n// wuffs_base__utf_8__byte_length_minus_1 is the byte length (minus 1) of a\n// UTF-8 encoded code point, based on the encoding's initial byte.\n//  - 0x00 is 1-byte UTF-8 (ASCII).\n//  - 0x01 is the start of 2-byte UTF-8.\n//  - 0x02 is the start of 3-byte UTF-8.\n//  - 0x03 is the start of 4-byte UTF-8.\n//  - 0x40 is a UTF-8 tail byte.\n//  - 0x80 is invalid UTF-8.\n//\n// RFC 3629 (UTF-8) gives this grammar for valid UTF-8:\n//    UTF8-1      = %x00-7F\n//    UTF8-2      = %xC2-DF UTF8-tail\n//    UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /\n//                  %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )\n//    UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /\n//                  %xF4 %x80-8F 2( UTF8-tail )\n//    UTF8-tail   = %x80-BF\nstatic const uint8_t wuffs_base__utf_8__byte_length_minus_1[256] = {\n    // 0     1     2     3     4     5     6     7\n    // 8     9" +
-	"     A     B     C     D     E     F\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x00 ..= 0x07.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x08 ..= 0x0F.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x10 ..= 0x17.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x18 ..= 0x1F.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x20 ..= 0x27.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x28 ..= 0x2F.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x30 ..= 0x37.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x38 ..= 0x3F.\n\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x40 ..= 0x47.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x48 ..= 0x4F.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x50 ..= 0x57.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x58 ..= 0x5F.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x60 ..= 0x67.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x68 .." +
-	"= 0x6F.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x70 ..= 0x77.\n    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x78 ..= 0x7F.\n\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x80 ..= 0x87.\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x88 ..= 0x8F.\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x90 ..= 0x97.\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x98 ..= 0x9F.\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xA0 ..= 0xA7.\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xA8 ..= 0xAF.\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xB0 ..= 0xB7.\n    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xB8 ..= 0xBF.\n\n    0x80, 0x80, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xC0 ..= 0xC7.\n    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xC8 ..= 0xCF.\n    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xD0 ..= 0xD7.\n    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xD8 ..= 0xDF.\n    0x02, 0x02, 0x02" +
-	", 0x02, 0x02, 0x02, 0x02, 0x02,  // 0xE0 ..= 0xE7.\n    0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,  // 0xE8 ..= 0xEF.\n    0x03, 0x03, 0x03, 0x03, 0x03, 0x80, 0x80, 0x80,  // 0xF0 ..= 0xF7.\n    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,  // 0xF8 ..= 0xFF.\n    // 0     1     2     3     4     5     6     7\n    // 8     9     A     B     C     D     E     F\n};\n\nwuffs_base__utf_8__next__output  //\nwuffs_base__utf_8__next(wuffs_base__slice_u8 s) {\n  if (s.len == 0) {\n    return wuffs_base__make_utf_8__next__output(0, 0);\n  }\n  uint32_t c = s.ptr[0];\n  switch (wuffs_base__utf_8__byte_length_minus_1[c & 0xFF]) {\n    case 0:\n      return wuffs_base__make_utf_8__next__output(c, 1);\n\n    case 1:\n      if (s.len < 2) {\n        break;\n      }\n      c = wuffs_base__load_u16le__no_bounds_check(s.ptr);\n      if ((c & 0xC000) != 0x8000) {\n        break;\n      }\n      c = (0x0007C0 & (c << 6)) | (0x00003F & (c >> 8));\n      return wuffs_base__make_utf_8__next__output(c, 2);\n\n    case 2:\n      if (s.len < 3) {\n       " +
-	" break;\n      }\n      c = wuffs_base__load_u24le__no_bounds_check(s.ptr);\n      if ((c & 0xC0C000) != 0x808000) {\n        break;\n      }\n      c = (0x00F000 & (c << 12)) | (0x000FC0 & (c >> 2)) |\n          (0x00003F & (c >> 16));\n      if ((c <= 0x07FF) || ((0xD800 <= c) && (c <= 0xDFFF))) {\n        break;\n      }\n      return wuffs_base__make_utf_8__next__output(c, 3);\n\n    case 3:\n      if (s.len < 4) {\n        break;\n      }\n      c = wuffs_base__load_u32le__no_bounds_check(s.ptr);\n      if ((c & 0xC0C0C000) != 0x80808000) {\n        break;\n      }\n      c = (0x1C0000 & (c << 18)) | (0x03F000 & (c << 4)) |\n          (0x000FC0 & (c >> 10)) | (0x00003F & (c >> 24));\n      if ((c <= 0xFFFF) || (0x110000 <= c)) {\n        break;\n      }\n      return wuffs_base__make_utf_8__next__output(c, 4);\n  }\n\n  return wuffs_base__make_utf_8__next__output(\n      WUFFS_BASE__UNICODE_REPLACEMENT_CHARACTER, 1);\n}\n\nsize_t  //\nwuffs_base__utf_8__longest_valid_prefix(wuffs_base__slice_u8 s) {\n  // TODO: possibly optimize the all-A" +
-	"SCII case (4 or 8 bytes at a time).\n  //\n  // TODO: possibly optimize this by manually inlining the\n  // wuffs_base__utf_8__next calls.\n  size_t original_len = s.len;\n  while (s.len > 0) {\n    wuffs_base__utf_8__next__output o = wuffs_base__utf_8__next(s);\n    if ((o.code_point > 0x7F) && (o.byte_length == 1)) {\n      break;\n    }\n    s.ptr += o.byte_length;\n    s.len -= o.byte_length;\n  }\n  return original_len - s.len;\n}\n\nsize_t  //\nwuffs_base__ascii__longest_valid_prefix(wuffs_base__slice_u8 s) {\n  // TODO: possibly optimize this by checking 4 or 8 bytes at a time.\n  uint8_t* original_ptr = s.ptr;\n  uint8_t* p = s.ptr;\n  uint8_t* q = s.ptr + s.len;\n  for (; (p != q) && ((*p & 0x80) == 0); p++) {\n  }\n  return (size_t)(p - original_ptr);\n}\n" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_1_1(wuffs_base__slice_u8 dst,\n                                     wuffs_base__slice_u8 dst_palette,\n                                     wuffs_base__slice_u8 src) {\n  return wuffs_base__slice_u8__copy_from_slice(dst, src);\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_3_3(wuffs_base__slice_u8 dst,\n                                     wuffs_base__slice_u8 dst_palette,\n                                     wuffs_base__slice_u8 src) {\n  size_t dst_len3 = dst.len / 3;\n  size_t src_len3 = src.len / 3;\n  size_t len = dst_len3 < src_len3 ? dst_len3 : src_len3;\n  if (len > 0) {\n    memmove(dst.ptr, src.ptr, len * 3);\n  }\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__copy_4_4(wuffs_base__slice_u8 dst,\n                                     wuffs_base__slice_u8 dst_palette,\n                                     wuffs_base__slice_u8 src) {\n  size_t dst_len4 = dst.len / 4;\n  size_t src_len4 = src.len / 4;\n  size_t len = dst_len4 <" +
+	" src_len4 ? dst_len4 : src_len4;\n  if (len > 0) {\n    memmove(dst.ptr, src.ptr, len * 4);\n  }\n  return len;\n}\n\n" +
+	"" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__bgr(wuffs_base__slice_u8 dst,\n                                         wuffs_base__slice_u8 dst_palette,\n                                         wuffs_base__slice_u8 src) {\n  size_t dst_len2 = dst.len / 2;\n  size_t src_len3 = src.len / 3;\n  size_t len = dst_len2 < src_len3 ? dst_len2 : src_len3;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t b5 = s[0] >> 3;\n    uint32_t g6 = s[1] >> 2;\n    uint32_t r5 = s[2] >> 3;\n    uint32_t rgb_565 = (r5 << 11) | (g6 << 5) | (b5 << 0);\n    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);\n\n    s += 1 * 3;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  size_t dst_len2 = dst.len / 2;\n  size_t src_len4 = src.len / 4;\n  size_t len = " +
+	"dst_len2 < src_len4 ? dst_len2 : src_len4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    wuffs_base__store_u16le__no_bounds_check(\n        d + (0 * 2),\n        wuffs_base__color_u32_argb_premul__as__color_u16_rgb_565(\n            wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(\n                wuffs_base__load_u32le__no_bounds_check(s + (0 * 4)))));\n\n    s += 1 * 4;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  size_t dst_len2 = dst.len / 2;\n  size_t src_len4 = src.len / 4;\n  size_t len = dst_len2 < src_len4 ? dst_len2 : src_len4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    // Convert from 8-bit color to 16-bit color.\n    uint32_t sa = 0x101 * ((uint32_t)s[3]);\n    uint32_t sr = 0" +
+	"x101 * ((uint32_t)s[2]);\n    uint32_t sg = 0x101 * ((uint32_t)s[1]);\n    uint32_t sb = 0x101 * ((uint32_t)s[0]);\n\n    // Convert from 565 color to 16-bit color.\n    uint32_t old_rgb_565 = wuffs_base__load_u16le__no_bounds_check(d + (0 * 2));\n    uint32_t old_r5 = 0x1F & (old_rgb_565 >> 11);\n    uint32_t dr = (0x8421 * old_r5) >> 4;\n    uint32_t old_g6 = 0x3F & (old_rgb_565 >> 5);\n    uint32_t dg = (0x1041 * old_g6) >> 2;\n    uint32_t old_b5 = 0x1F & (old_rgb_565 >> 0);\n    uint32_t db = (0x8421 * old_b5) >> 4;\n\n    // Calculate the inverse of the src-alpha: how much of the dst to keep.\n    uint32_t ia = 0xFFFF - sa;\n\n    // Composite src (nonpremul) over dst (premul).\n    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;\n    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;\n    db = ((sb * sa) + (db * ia)) / 0xFFFF;\n\n    // Convert from 16-bit color to 565 color and combine the components.\n    uint32_t new_r5 = 0x1F & (dr >> 11);\n    uint32_t new_g6 = 0x3F & (dg >> 10);\n    uint32_t new_b5 = 0x1F & (db >> 11);\n    uint32_t new_rgb" +
+	"_565 = (new_r5 << 11) | (new_g6 << 5) | (new_b5 << 0);\n    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2),\n                                             (uint16_t)new_rgb_565);\n\n    s += 1 * 4;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__y(wuffs_base__slice_u8 dst,\n                                       wuffs_base__slice_u8 dst_palette,\n                                       wuffs_base__slice_u8 src) {\n  size_t dst_len2 = dst.len / 2;\n  size_t len = dst_len2 < src.len ? dst_len2 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t y5 = s[0] >> 3;\n    uint32_t y6 = s[0] >> 2;\n    uint32_t rgb_565 = (y5 << 11) | (y6 << 5) | (y5 << 0);\n    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);\n\n    s += 1 * 1;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__index__src(\n    wuffs_base__slice_u8" +
+	" dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len2 = dst.len / 2;\n  size_t len = dst_len2 < src.len ? dst_len2 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  while (n >= loop_unroll_count) {\n    wuffs_base__store_u16le__no_bounds_check(\n        d + (0 * 2), wuffs_base__load_u16le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[0] * 4)));\n    wuffs_base__store_u16le__no_bounds_check(\n        d + (1 * 2), wuffs_base__load_u16le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[1] * 4)));\n    wuffs_base__store_u16le__no_bounds_check(\n        d + (2 * 2), wuffs_base__load_u16le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[2] * 4)));\n    wuffs_base__store_u16le__no_bounds_check(\n        d + (3 * 2), wuffs_base__load_u16le__no_bounds_check(\n                         dst_palette.ptr" +
+	" + ((size_t)s[3] * 4)));\n\n    s += loop_unroll_count * 1;\n    d += loop_unroll_count * 2;\n    n -= loop_unroll_count;\n  }\n\n  while (n >= 1) {\n    wuffs_base__store_u16le__no_bounds_check(\n        d + (0 * 2), wuffs_base__load_u16le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[0] * 4)));\n\n    s += 1 * 1;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len2 = dst.len / 2;\n  size_t len = dst_len2 < src.len ? dst_len2 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__store_u16le__no_bounds" +
+	"_check(d + (0 * 2), (uint16_t)s0);\n    }\n\n    s += 1 * 1;\n    d += 1 * 2;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
+	"" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  size_t dst_len3 = dst.len / 3;\n  size_t src_len4 = src.len / 4;\n  size_t len = dst_len3 < src_len4 ? dst_len3 : src_len4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t s0 =\n        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(\n            wuffs_base__load_u32le__no_bounds_check(s + (0 * 4)));\n    wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  size_t dst_len3 = dst.len / 3;\n  size_t src_len4 = src.len / 4;\n  size_t len = dst_len3 < src_len4 ? dst_len3 : src_len4;\n  uint8_t* d = d" +
+	"st.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    // Convert from 8-bit color to 16-bit color.\n    uint32_t sa = 0x101 * ((uint32_t)s[3]);\n    uint32_t sr = 0x101 * ((uint32_t)s[2]);\n    uint32_t sg = 0x101 * ((uint32_t)s[1]);\n    uint32_t sb = 0x101 * ((uint32_t)s[0]);\n    uint32_t dr = 0x101 * ((uint32_t)d[2]);\n    uint32_t dg = 0x101 * ((uint32_t)d[1]);\n    uint32_t db = 0x101 * ((uint32_t)d[0]);\n\n    // Calculate the inverse of the src-alpha: how much of the dst to keep.\n    uint32_t ia = 0xFFFF - sa;\n\n    // Composite src (nonpremul) over dst (premul).\n    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;\n    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;\n    db = ((sb * sa) + (db * ia)) / 0xFFFF;\n\n    // Convert from 16-bit color to 8-bit color.\n    d[0] = (uint8_t)(db >> 8);\n    d[1] = (uint8_t)(dg >> 8);\n    d[2] = (uint8_t)(dr >> 8);\n\n    s += 1 * 4;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
+	"" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  size_t dst_len4 = dst.len / 4;\n  size_t src_len4 = src.len / 4;\n  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t d0 = wuffs_base__load_u32le__no_bounds_check(d + (0 * 4));\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4),\n        wuffs_base__composite_nonpremul_nonpremul_u32_axxx(d0, s0));\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
+	"" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  size_t dst_len4 = dst.len / 4;\n  size_t src_len4 = src.len / 4;\n  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4),\n        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(s0));\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  size_t dst_len4 = dst.len / 4;\n  size_t src_len4 = src.len / 4;\n  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;\n  ui" +
+	"nt8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint32_t d0 = wuffs_base__load_u32le__no_bounds_check(d + (0 * 4));\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4), wuffs_base__composite_premul_nonpremul_u32_axxx(d0, s0));\n\n    s += 1 * 4;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
+	"" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxx__index__src(wuffs_base__slice_u8 dst,\n                                            wuffs_base__slice_u8 dst_palette,\n                                            wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len3 = dst.len / 3;\n  size_t len = dst_len3 < src.len ? dst_len3 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  // The comparison in the while condition is \">\", not \">=\", because with\n  // \">=\", the last 4-byte store could write past the end of the dst slice.\n  //\n  // Each 4-byte store writes one too many bytes, but a subsequent store\n  // will overwrite that with the correct byte. There is always another\n  // store, whether a 4-byte store in this loop or a 1-byte store in the\n  // next loop.\n  while (n > loop_unroll_count) {\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 3), wuffs_base__load_u32le__no_bounds_c" +
+	"heck(\n                         dst_palette.ptr + ((size_t)s[0] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (1 * 3), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[1] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (2 * 3), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[2] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (3 * 3), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[3] * 4)));\n\n    s += loop_unroll_count * 1;\n    d += loop_unroll_count * 3;\n    n -= loop_unroll_count;\n  }\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);\n\n    s += 1 * 1;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__" +
+	"pixel_swizzler__xxx__index_binary_alpha__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len3 = dst.len / 3;\n  size_t len = dst_len3 < src.len ? dst_len3 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  while (n >= loop_unroll_count) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);\n    }\n    uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[1] * 4));\n    if (s1) {\n      wuffs_base__store_u24le__no_bounds_check(d + (1 * 3), s1);\n    }\n    uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                    " +
+	"                      ((size_t)s[2] * 4));\n    if (s2) {\n      wuffs_base__store_u24le__no_bounds_check(d + (2 * 3), s2);\n    }\n    uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[3] * 4));\n    if (s3) {\n      wuffs_base__store_u24le__no_bounds_check(d + (3 * 3), s3);\n    }\n\n    s += loop_unroll_count * 1;\n    d += loop_unroll_count * 3;\n    n -= loop_unroll_count;\n  }\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);\n    }\n\n    s += 1 * 1;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxx__y(wuffs_base__slice_u8 dst,\n                                   wuffs_base__slice_u8 dst_palette,\n                                   wuffs_base__slice_u8 src) {\n  size_t dst_len3 =" +
+	" dst.len / 3;\n  size_t len = dst_len3 < src.len ? dst_len3 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    uint8_t s0 = s[0];\n    d[0] = s0;\n    d[1] = s0;\n    d[2] = s0;\n\n    s += 1 * 1;\n    d += 1 * 3;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
+	"" +
+	"// --------\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxx__index__src(wuffs_base__slice_u8 dst,\n                                             wuffs_base__slice_u8 dst_palette,\n                                             wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len4 = dst.len / 4;\n  size_t len = dst_len4 < src.len ? dst_len4 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  while (n >= loop_unroll_count) {\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[0] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (1 * 4), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[1] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (2 * 4), wuffs_base__load_u32le__no_bounds_check(\n                         dst_pale" +
+	"tte.ptr + ((size_t)s[2] * 4)));\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (3 * 4), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[3] * 4)));\n\n    s += loop_unroll_count * 1;\n    d += loop_unroll_count * 4;\n    n -= loop_unroll_count;\n  }\n\n  while (n >= 1) {\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(\n                         dst_palette.ptr + ((size_t)s[0] * 4)));\n\n    s += 1 * 1;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (dst_palette.len != 1024) {\n    return 0;\n  }\n  size_t dst_len4 = dst.len / 4;\n  size_t len = dst_len4 < src.len ? dst_len4 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  const size_t loop_unroll_count = 4;\n\n  while (n >= loop_unroll_count)" +
+	" {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__store_u32le__no_bounds_check(d + (0 * 4), s0);\n    }\n    uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[1] * 4));\n    if (s1) {\n      wuffs_base__store_u32le__no_bounds_check(d + (1 * 4), s1);\n    }\n    uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[2] * 4));\n    if (s2) {\n      wuffs_base__store_u32le__no_bounds_check(d + (2 * 4), s2);\n    }\n    uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[3] * 4));\n    if (s3) {\n      wuffs_base__store_u32le__no_bounds_check(d + (3 * 4), s3);\n    }\n\n    s += loop_unroll_count * 1;\n    d += loop_unroll_count *" +
+	" 4;\n    n -= loop_unroll_count;\n  }\n\n  while (n >= 1) {\n    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +\n                                                          ((size_t)s[0] * 4));\n    if (s0) {\n      wuffs_base__store_u32le__no_bounds_check(d + (0 * 4), s0);\n    }\n\n    s += 1 * 1;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxx__xxx(wuffs_base__slice_u8 dst,\n                                      wuffs_base__slice_u8 dst_palette,\n                                      wuffs_base__slice_u8 src) {\n  size_t dst_len4 = dst.len / 4;\n  size_t src_len3 = src.len / 3;\n  size_t len = dst_len4 < src_len3 ? dst_len4 : src_len3;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4),\n        0xFF000000 | wuffs_base__load_u24le__no_bounds_check(s + (0 * 3)));\n\n    s += 1 * 3;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n" +
+	"\nstatic uint64_t  //\nwuffs_base__pixel_swizzler__xxxx__y(wuffs_base__slice_u8 dst,\n                                    wuffs_base__slice_u8 dst_palette,\n                                    wuffs_base__slice_u8 src) {\n  size_t dst_len4 = dst.len / 4;\n  size_t len = dst_len4 < src.len ? dst_len4 : src.len;\n  uint8_t* d = dst.ptr;\n  uint8_t* s = src.ptr;\n  size_t n = len;\n\n  // TODO: unroll.\n\n  while (n >= 1) {\n    wuffs_base__store_u32le__no_bounds_check(\n        d + (0 * 4), 0xFF000000 | (0x010101 * (uint32_t)s[0]));\n\n    s += 1 * 1;\n    d += 1 * 4;\n    n -= 1;\n  }\n\n  return len;\n}\n\n" +
+	"" +
+	"// --------\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__y(wuffs_base__pixel_swizzler* p,\n                                       wuffs_base__pixel_format dst_format,\n                                       wuffs_base__slice_u8 dst_palette,\n                                       wuffs_base__slice_u8 src_palette,\n                                       wuffs_base__pixel_blend blend) {\n  switch (dst_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__y;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      return wuffs_base__pixel_swizzler__xxx__y;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WU" +
+	"FFS_BASE__PIXEL_FORMAT__RGBX:\n      return wuffs_base__pixel_swizzler__xxxx__y;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_format,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_1_1;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      if (wuffs_base__pixel_swizzler__squash_bgr_565_888(dst_palette,\n                                        " +
+	"                 src_palette) != 1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr_565__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_pa" +
+	"lette) !=\n          1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(dst_palette,\n                                                     src_palette) != 1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n      if (wuffs_base__pixel_swizzler__" +
+	"swap_rgbx_bgrx(dst_palette,\n                                                     src_palette) != 1024) {\n        return NULL;\n      }\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__xxxx__index__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;\n      }\n      return NULL;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,\n                                         wuffs_base__pixel_format dst_format,\n                                         wuffs_base__slice_u8 dst_palette,\n                                         wuffs_base__slice_u8 src_palette,\n                                         wuffs_base__pixel_blend blend) {\n  switch (dst_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      return wuffs_base__pixel_swizzler__bgr_565__bgr;\n\n    case WUFFS_BASE__PIXEL_FORMAT__B" +
+	"GR:\n      return wuffs_base__pixel_swizzler__copy_3_3;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      return wuffs_base__pixel_swizzler__xxxx__xxx;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\nstatic wuffs_base__pixel_swizzler__func  //\nwuffs_base__pixel_swizzler__prepare__bgra_nonpremul(\n    wuffs_base__pixel_swizzler* p,\n    wuffs_base__pixel_format dst_format,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src_palette,\n    wuffs_base__pixel_blend blend) {\n  switch (dst_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_b" +
+	"ase__pixel_swizzler__bgr_565__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swizzler__copy_4_4;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:\n      switch (blend) {\n        case WUFFS_BASE__PIXEL_BLEND__SRC:\n          return wuffs_base__pixel_swiz" +
+	"zler__bgra_premul__bgra_nonpremul__src;\n        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:\n          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over;\n      }\n      return NULL;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__BGRX:\n      // TODO.\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__RGB:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:\n    case WUFFS_BASE__PIXEL_FORMAT__RGBX:\n      // TODO.\n      break;\n  }\n  return NULL;\n}\n\n" +
+	"" +
+	"// --------\n\nwuffs_base__status  //\nwuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,\n                                    wuffs_base__pixel_format dst_format,\n                                    wuffs_base__slice_u8 dst_palette,\n                                    wuffs_base__pixel_format src_format,\n                                    wuffs_base__slice_u8 src_palette,\n                                    wuffs_base__pixel_blend blend) {\n  if (!p) {\n    return wuffs_base__make_status(wuffs_base__error__bad_receiver);\n  }\n\n  // TODO: support many more formats.\n\n  wuffs_base__pixel_swizzler__func func = NULL;\n\n  switch (src_format.repr) {\n    case WUFFS_BASE__PIXEL_FORMAT__Y:\n      func = wuffs_base__pixel_swizzler__prepare__y(p, dst_format, dst_palette,\n                                                    src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:\n      func = wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(\n          p, dst_format, dst_" +
+	"palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGR:\n      func = wuffs_base__pixel_swizzler__prepare__bgr(\n          p, dst_format, dst_palette, src_palette, blend);\n      break;\n\n    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:\n      func = wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(\n          p, dst_format, dst_palette, src_palette, blend);\n      break;\n  }\n\n  p->private_impl.func = func;\n  return wuffs_base__make_status(\n      func ? NULL : wuffs_base__error__unsupported_pixel_swizzler_option);\n}\n\nuint64_t  //\nwuffs_base__pixel_swizzler__swizzle_interleaved(\n    const wuffs_base__pixel_swizzler* p,\n    wuffs_base__slice_u8 dst,\n    wuffs_base__slice_u8 dst_palette,\n    wuffs_base__slice_u8 src) {\n  if (p && p->private_impl.func) {\n    return (*p->private_impl.func)(dst, dst_palette, src);\n  }\n  return 0;\n}\n" +
 	""
 
 const baseFundamentalPrivateH = "" +
diff --git a/internal/cgen/gen.go b/internal/cgen/gen.go
index 21a4f6d..b946828 100644
--- a/internal/cgen/gen.go
+++ b/internal/cgen/gen.go
@@ -77,6 +77,9 @@
 		{"base/image-impl.c", "baseImageImplC"},
 		{"base/strconv-impl.c", "baseStrConvImplC"},
 
+		{"base/f64conv-submodule.c", "baseF64ConvSubmoduleC"},
+		{"base/pixconv-submodule.c", "basePixConvSubmoduleC"},
+
 		{"base/fundamental-private.h", "baseFundamentalPrivateH"},
 		{"base/fundamental-public.h", "baseFundamentalPublicH"},
 		{"base/memory-private.h", "baseMemoryPrivateH"},
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 72a4d01..b5161ef 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -8531,51 +8531,6 @@
 
 // --------
 
-static uint64_t  //
-wuffs_base__pixel_swizzler__squash_bgr_565_888(wuffs_base__slice_u8 dst,
-                                               wuffs_base__slice_u8 src) {
-  size_t len4 = (dst.len < src.len ? dst.len : src.len) / 4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-
-  size_t n = len4;
-  while (n--) {
-    uint32_t argb = wuffs_base__load_u32le__no_bounds_check(s);
-    uint32_t b5 = 0x1F & (argb >> (8 - 5));
-    uint32_t g6 = 0x3F & (argb >> (16 - 6));
-    uint32_t r5 = 0x1F & (argb >> (24 - 5));
-    uint32_t alpha = argb & 0xFF000000;
-    wuffs_base__store_u32le__no_bounds_check(
-        d, alpha | (r5 << 11) | (g6 << 5) | (b5 << 0));
-    s += 4;
-    d += 4;
-  }
-  return len4 * 4;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__swap_rgbx_bgrx(wuffs_base__slice_u8 dst,
-                                           wuffs_base__slice_u8 src) {
-  size_t len4 = (dst.len < src.len ? dst.len : src.len) / 4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-
-  size_t n = len4;
-  while (n--) {
-    uint8_t b0 = s[0];
-    uint8_t b1 = s[1];
-    uint8_t b2 = s[2];
-    uint8_t b3 = s[3];
-    d[0] = b2;
-    d[1] = b1;
-    d[2] = b0;
-    d[3] = b3;
-    s += 4;
-    d += 4;
-  }
-  return len4 * 4;
-}
-
 static inline uint32_t  //
 wuffs_base__swap_u32_argb_abgr(uint32_t u) {
   uint32_t o = u & 0xFF00FF00;
@@ -8586,152 +8541,6 @@
 
 // --------
 
-static inline uint32_t  //
-wuffs_base__composite_nonpremul_nonpremul_u32_axxx(uint32_t dst_nonpremul,
-                                                   uint32_t src_nonpremul) {
-  // Convert from 8-bit color to 16-bit color.
-  uint32_t sa = 0x101 * (0xFF & (src_nonpremul >> 24));
-  uint32_t sr = 0x101 * (0xFF & (src_nonpremul >> 16));
-  uint32_t sg = 0x101 * (0xFF & (src_nonpremul >> 8));
-  uint32_t sb = 0x101 * (0xFF & (src_nonpremul >> 0));
-  uint32_t da = 0x101 * (0xFF & (dst_nonpremul >> 24));
-  uint32_t dr = 0x101 * (0xFF & (dst_nonpremul >> 16));
-  uint32_t dg = 0x101 * (0xFF & (dst_nonpremul >> 8));
-  uint32_t db = 0x101 * (0xFF & (dst_nonpremul >> 0));
-
-  // Convert dst from nonpremul to premul.
-  dr = (dr * da) / 0xFFFF;
-  dg = (dg * da) / 0xFFFF;
-  db = (db * da) / 0xFFFF;
-
-  // Calculate the inverse of the src-alpha: how much of the dst to keep.
-  uint32_t ia = 0xFFFF - sa;
-
-  // Composite src (nonpremul) over dst (premul).
-  da = sa + ((da * ia) / 0xFFFF);
-  dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
-  dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
-  db = ((sb * sa) + (db * ia)) / 0xFFFF;
-
-  // Convert dst from premul to nonpremul.
-  if (da != 0) {
-    dr = (dr * 0xFFFF) / da;
-    dg = (dg * 0xFFFF) / da;
-    db = (db * 0xFFFF) / da;
-  }
-
-  // Convert from 16-bit color to 8-bit color and combine the components.
-  da >>= 8;
-  dr >>= 8;
-  dg >>= 8;
-  db >>= 8;
-  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
-}
-
-static inline uint32_t  //
-wuffs_base__composite_nonpremul_premul_u32_axxx(uint32_t dst_nonpremul,
-                                                uint32_t src_premul) {
-  // Convert from 8-bit color to 16-bit color.
-  uint32_t sa = 0x101 * (0xFF & (src_premul >> 24));
-  uint32_t sr = 0x101 * (0xFF & (src_premul >> 16));
-  uint32_t sg = 0x101 * (0xFF & (src_premul >> 8));
-  uint32_t sb = 0x101 * (0xFF & (src_premul >> 0));
-  uint32_t da = 0x101 * (0xFF & (dst_nonpremul >> 24));
-  uint32_t dr = 0x101 * (0xFF & (dst_nonpremul >> 16));
-  uint32_t dg = 0x101 * (0xFF & (dst_nonpremul >> 8));
-  uint32_t db = 0x101 * (0xFF & (dst_nonpremul >> 0));
-
-  // Convert dst from nonpremul to premul.
-  dr = (dr * da) / 0xFFFF;
-  dg = (dg * da) / 0xFFFF;
-  db = (db * da) / 0xFFFF;
-
-  // Calculate the inverse of the src-alpha: how much of the dst to keep.
-  uint32_t ia = 0xFFFF - sa;
-
-  // Composite src (premul) over dst (premul).
-  da = sa + ((da * ia) / 0xFFFF);
-  dr = sr + ((dr * ia) / 0xFFFF);
-  dg = sg + ((dg * ia) / 0xFFFF);
-  db = sb + ((db * ia) / 0xFFFF);
-
-  // Convert dst from premul to nonpremul.
-  if (da != 0) {
-    dr = (dr * 0xFFFF) / da;
-    dg = (dg * 0xFFFF) / da;
-    db = (db * 0xFFFF) / da;
-  }
-
-  // Convert from 16-bit color to 8-bit color and combine the components.
-  da >>= 8;
-  dr >>= 8;
-  dg >>= 8;
-  db >>= 8;
-  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
-}
-
-static inline uint32_t  //
-wuffs_base__composite_premul_nonpremul_u32_axxx(uint32_t dst_premul,
-                                                uint32_t src_nonpremul) {
-  // Convert from 8-bit color to 16-bit color.
-  uint32_t sa = 0x101 * (0xFF & (src_nonpremul >> 24));
-  uint32_t sr = 0x101 * (0xFF & (src_nonpremul >> 16));
-  uint32_t sg = 0x101 * (0xFF & (src_nonpremul >> 8));
-  uint32_t sb = 0x101 * (0xFF & (src_nonpremul >> 0));
-  uint32_t da = 0x101 * (0xFF & (dst_premul >> 24));
-  uint32_t dr = 0x101 * (0xFF & (dst_premul >> 16));
-  uint32_t dg = 0x101 * (0xFF & (dst_premul >> 8));
-  uint32_t db = 0x101 * (0xFF & (dst_premul >> 0));
-
-  // Calculate the inverse of the src-alpha: how much of the dst to keep.
-  uint32_t ia = 0xFFFF - sa;
-
-  // Composite src (nonpremul) over dst (premul).
-  da = sa + ((da * ia) / 0xFFFF);
-  dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
-  dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
-  db = ((sb * sa) + (db * ia)) / 0xFFFF;
-
-  // Convert from 16-bit color to 8-bit color and combine the components.
-  da >>= 8;
-  dr >>= 8;
-  dg >>= 8;
-  db >>= 8;
-  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
-}
-
-static inline uint32_t  //
-wuffs_base__composite_premul_premul_u32_axxx(uint32_t dst_premul,
-                                             uint32_t src_premul) {
-  // Convert from 8-bit color to 16-bit color.
-  uint32_t sa = 0x101 * (0xFF & (src_premul >> 24));
-  uint32_t sr = 0x101 * (0xFF & (src_premul >> 16));
-  uint32_t sg = 0x101 * (0xFF & (src_premul >> 8));
-  uint32_t sb = 0x101 * (0xFF & (src_premul >> 0));
-  uint32_t da = 0x101 * (0xFF & (dst_premul >> 24));
-  uint32_t dr = 0x101 * (0xFF & (dst_premul >> 16));
-  uint32_t dg = 0x101 * (0xFF & (dst_premul >> 8));
-  uint32_t db = 0x101 * (0xFF & (dst_premul >> 0));
-
-  // Calculate the inverse of the src-alpha: how much of the dst to keep.
-  uint32_t ia = 0xFFFF - sa;
-
-  // Composite src (premul) over dst (premul).
-  da = sa + ((da * ia) / 0xFFFF);
-  dr = sr + ((dr * ia) / 0xFFFF);
-  dg = sg + ((dg * ia) / 0xFFFF);
-  db = sb + ((db * ia) / 0xFFFF);
-
-  // Convert from 16-bit color to 8-bit color and combine the components.
-  da >>= 8;
-  dr >>= 8;
-  dg >>= 8;
-  db >>= 8;
-  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
-}
-
-// --------
-
 wuffs_base__color_u32_argb_premul  //
 wuffs_base__pixel_buffer__color_u32_at(const wuffs_base__pixel_buffer* pb,
                                        uint32_t x,
@@ -8956,988 +8765,6 @@
   return (uint8_t)best_index;
 }
 
-// --------
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__copy_1_1(wuffs_base__slice_u8 dst,
-                                     wuffs_base__slice_u8 dst_palette,
-                                     wuffs_base__slice_u8 src) {
-  return wuffs_base__slice_u8__copy_from_slice(dst, src);
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__copy_3_3(wuffs_base__slice_u8 dst,
-                                     wuffs_base__slice_u8 dst_palette,
-                                     wuffs_base__slice_u8 src) {
-  size_t dst_len3 = dst.len / 3;
-  size_t src_len3 = src.len / 3;
-  size_t len = dst_len3 < src_len3 ? dst_len3 : src_len3;
-  if (len > 0) {
-    memmove(dst.ptr, src.ptr, len * 3);
-  }
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__copy_4_4(wuffs_base__slice_u8 dst,
-                                     wuffs_base__slice_u8 dst_palette,
-                                     wuffs_base__slice_u8 src) {
-  size_t dst_len4 = dst.len / 4;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
-  if (len > 0) {
-    memmove(dst.ptr, src.ptr, len * 4);
-  }
-  return len;
-}
-
-// --------
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr_565__bgr(wuffs_base__slice_u8 dst,
-                                         wuffs_base__slice_u8 dst_palette,
-                                         wuffs_base__slice_u8 src) {
-  size_t dst_len2 = dst.len / 2;
-  size_t src_len3 = src.len / 3;
-  size_t len = dst_len2 < src_len3 ? dst_len2 : src_len3;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint32_t b5 = s[0] >> 3;
-    uint32_t g6 = s[1] >> 2;
-    uint32_t r5 = s[2] >> 3;
-    uint32_t rgb_565 = (r5 << 11) | (g6 << 5) | (b5 << 0);
-    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);
-
-    s += 1 * 3;
-    d += 1 * 2;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  size_t dst_len2 = dst.len / 2;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len2 < src_len4 ? dst_len2 : src_len4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    wuffs_base__store_u16le__no_bounds_check(
-        d + (0 * 2),
-        wuffs_base__color_u32_argb_premul__as__color_u16_rgb_565(
-            wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(
-                wuffs_base__load_u32le__no_bounds_check(s + (0 * 4)))));
-
-    s += 1 * 4;
-    d += 1 * 2;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  size_t dst_len2 = dst.len / 2;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len2 < src_len4 ? dst_len2 : src_len4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    // Convert from 8-bit color to 16-bit color.
-    uint32_t sa = 0x101 * ((uint32_t)s[3]);
-    uint32_t sr = 0x101 * ((uint32_t)s[2]);
-    uint32_t sg = 0x101 * ((uint32_t)s[1]);
-    uint32_t sb = 0x101 * ((uint32_t)s[0]);
-
-    // Convert from 565 color to 16-bit color.
-    uint32_t old_rgb_565 = wuffs_base__load_u16le__no_bounds_check(d + (0 * 2));
-    uint32_t old_r5 = 0x1F & (old_rgb_565 >> 11);
-    uint32_t dr = (0x8421 * old_r5) >> 4;
-    uint32_t old_g6 = 0x3F & (old_rgb_565 >> 5);
-    uint32_t dg = (0x1041 * old_g6) >> 2;
-    uint32_t old_b5 = 0x1F & (old_rgb_565 >> 0);
-    uint32_t db = (0x8421 * old_b5) >> 4;
-
-    // Calculate the inverse of the src-alpha: how much of the dst to keep.
-    uint32_t ia = 0xFFFF - sa;
-
-    // Composite src (nonpremul) over dst (premul).
-    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
-    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
-    db = ((sb * sa) + (db * ia)) / 0xFFFF;
-
-    // Convert from 16-bit color to 565 color and combine the components.
-    uint32_t new_r5 = 0x1F & (dr >> 11);
-    uint32_t new_g6 = 0x3F & (dg >> 10);
-    uint32_t new_b5 = 0x1F & (db >> 11);
-    uint32_t new_rgb_565 = (new_r5 << 11) | (new_g6 << 5) | (new_b5 << 0);
-    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2),
-                                             (uint16_t)new_rgb_565);
-
-    s += 1 * 4;
-    d += 1 * 2;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr_565__y(wuffs_base__slice_u8 dst,
-                                       wuffs_base__slice_u8 dst_palette,
-                                       wuffs_base__slice_u8 src) {
-  size_t dst_len2 = dst.len / 2;
-  size_t len = dst_len2 < src.len ? dst_len2 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint32_t y5 = s[0] >> 3;
-    uint32_t y6 = s[0] >> 2;
-    uint32_t rgb_565 = (y5 << 11) | (y6 << 5) | (y5 << 0);
-    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);
-
-    s += 1 * 1;
-    d += 1 * 2;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr_565__index__src(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  if (dst_palette.len != 1024) {
-    return 0;
-  }
-  size_t dst_len2 = dst.len / 2;
-  size_t len = dst_len2 < src.len ? dst_len2 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  const size_t loop_unroll_count = 4;
-
-  while (n >= loop_unroll_count) {
-    wuffs_base__store_u16le__no_bounds_check(
-        d + (0 * 2), wuffs_base__load_u16le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[0] * 4)));
-    wuffs_base__store_u16le__no_bounds_check(
-        d + (1 * 2), wuffs_base__load_u16le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[1] * 4)));
-    wuffs_base__store_u16le__no_bounds_check(
-        d + (2 * 2), wuffs_base__load_u16le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[2] * 4)));
-    wuffs_base__store_u16le__no_bounds_check(
-        d + (3 * 2), wuffs_base__load_u16le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[3] * 4)));
-
-    s += loop_unroll_count * 1;
-    d += loop_unroll_count * 2;
-    n -= loop_unroll_count;
-  }
-
-  while (n >= 1) {
-    wuffs_base__store_u16le__no_bounds_check(
-        d + (0 * 2), wuffs_base__load_u16le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[0] * 4)));
-
-    s += 1 * 1;
-    d += 1 * 2;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  if (dst_palette.len != 1024) {
-    return 0;
-  }
-  size_t dst_len2 = dst.len / 2;
-  size_t len = dst_len2 < src.len ? dst_len2 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[0] * 4));
-    if (s0) {
-      wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)s0);
-    }
-
-    s += 1 * 1;
-    d += 1 * 2;
-    n -= 1;
-  }
-
-  return len;
-}
-
-// --------
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  size_t dst_len3 = dst.len / 3;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len3 < src_len4 ? dst_len3 : src_len4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint32_t s0 =
-        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(
-            wuffs_base__load_u32le__no_bounds_check(s + (0 * 4)));
-    wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
-
-    s += 1 * 4;
-    d += 1 * 3;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  size_t dst_len3 = dst.len / 3;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len3 < src_len4 ? dst_len3 : src_len4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    // Convert from 8-bit color to 16-bit color.
-    uint32_t sa = 0x101 * ((uint32_t)s[3]);
-    uint32_t sr = 0x101 * ((uint32_t)s[2]);
-    uint32_t sg = 0x101 * ((uint32_t)s[1]);
-    uint32_t sb = 0x101 * ((uint32_t)s[0]);
-    uint32_t dr = 0x101 * ((uint32_t)d[2]);
-    uint32_t dg = 0x101 * ((uint32_t)d[1]);
-    uint32_t db = 0x101 * ((uint32_t)d[0]);
-
-    // Calculate the inverse of the src-alpha: how much of the dst to keep.
-    uint32_t ia = 0xFFFF - sa;
-
-    // Composite src (nonpremul) over dst (premul).
-    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
-    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
-    db = ((sb * sa) + (db * ia)) / 0xFFFF;
-
-    // Convert from 16-bit color to 8-bit color.
-    d[0] = (uint8_t)(db >> 8);
-    d[1] = (uint8_t)(dg >> 8);
-    d[2] = (uint8_t)(dr >> 8);
-
-    s += 1 * 4;
-    d += 1 * 3;
-    n -= 1;
-  }
-
-  return len;
-}
-
-// --------
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  size_t dst_len4 = dst.len / 4;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint32_t d0 = wuffs_base__load_u32le__no_bounds_check(d + (0 * 4));
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 4),
-        wuffs_base__composite_nonpremul_nonpremul_u32_axxx(d0, s0));
-
-    s += 1 * 4;
-    d += 1 * 4;
-    n -= 1;
-  }
-
-  return len;
-}
-
-// --------
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  size_t dst_len4 = dst.len / 4;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 4),
-        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(s0));
-
-    s += 1 * 4;
-    d += 1 * 4;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  size_t dst_len4 = dst.len / 4;
-  size_t src_len4 = src.len / 4;
-  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint32_t d0 = wuffs_base__load_u32le__no_bounds_check(d + (0 * 4));
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 4), wuffs_base__composite_premul_nonpremul_u32_axxx(d0, s0));
-
-    s += 1 * 4;
-    d += 1 * 4;
-    n -= 1;
-  }
-
-  return len;
-}
-
-// --------
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__xxx__index__src(wuffs_base__slice_u8 dst,
-                                            wuffs_base__slice_u8 dst_palette,
-                                            wuffs_base__slice_u8 src) {
-  if (dst_palette.len != 1024) {
-    return 0;
-  }
-  size_t dst_len3 = dst.len / 3;
-  size_t len = dst_len3 < src.len ? dst_len3 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  const size_t loop_unroll_count = 4;
-
-  // The comparison in the while condition is ">", not ">=", because with
-  // ">=", the last 4-byte store could write past the end of the dst slice.
-  //
-  // Each 4-byte store writes one too many bytes, but a subsequent store
-  // will overwrite that with the correct byte. There is always another
-  // store, whether a 4-byte store in this loop or a 1-byte store in the
-  // next loop.
-  while (n > loop_unroll_count) {
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 3), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[0] * 4)));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (1 * 3), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[1] * 4)));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (2 * 3), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[2] * 4)));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (3 * 3), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[3] * 4)));
-
-    s += loop_unroll_count * 1;
-    d += loop_unroll_count * 3;
-    n -= loop_unroll_count;
-  }
-
-  while (n >= 1) {
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[0] * 4));
-    wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
-
-    s += 1 * 1;
-    d += 1 * 3;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  if (dst_palette.len != 1024) {
-    return 0;
-  }
-  size_t dst_len3 = dst.len / 3;
-  size_t len = dst_len3 < src.len ? dst_len3 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  const size_t loop_unroll_count = 4;
-
-  while (n >= loop_unroll_count) {
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[0] * 4));
-    if (s0) {
-      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
-    }
-    uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[1] * 4));
-    if (s1) {
-      wuffs_base__store_u24le__no_bounds_check(d + (1 * 3), s1);
-    }
-    uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[2] * 4));
-    if (s2) {
-      wuffs_base__store_u24le__no_bounds_check(d + (2 * 3), s2);
-    }
-    uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[3] * 4));
-    if (s3) {
-      wuffs_base__store_u24le__no_bounds_check(d + (3 * 3), s3);
-    }
-
-    s += loop_unroll_count * 1;
-    d += loop_unroll_count * 3;
-    n -= loop_unroll_count;
-  }
-
-  while (n >= 1) {
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[0] * 4));
-    if (s0) {
-      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
-    }
-
-    s += 1 * 1;
-    d += 1 * 3;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__xxx__y(wuffs_base__slice_u8 dst,
-                                   wuffs_base__slice_u8 dst_palette,
-                                   wuffs_base__slice_u8 src) {
-  size_t dst_len3 = dst.len / 3;
-  size_t len = dst_len3 < src.len ? dst_len3 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    uint8_t s0 = s[0];
-    d[0] = s0;
-    d[1] = s0;
-    d[2] = s0;
-
-    s += 1 * 1;
-    d += 1 * 3;
-    n -= 1;
-  }
-
-  return len;
-}
-
-// --------
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__xxxx__index__src(wuffs_base__slice_u8 dst,
-                                             wuffs_base__slice_u8 dst_palette,
-                                             wuffs_base__slice_u8 src) {
-  if (dst_palette.len != 1024) {
-    return 0;
-  }
-  size_t dst_len4 = dst.len / 4;
-  size_t len = dst_len4 < src.len ? dst_len4 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  const size_t loop_unroll_count = 4;
-
-  while (n >= loop_unroll_count) {
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[0] * 4)));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (1 * 4), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[1] * 4)));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (2 * 4), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[2] * 4)));
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (3 * 4), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[3] * 4)));
-
-    s += loop_unroll_count * 1;
-    d += loop_unroll_count * 4;
-    n -= loop_unroll_count;
-  }
-
-  while (n >= 1) {
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(
-                         dst_palette.ptr + ((size_t)s[0] * 4)));
-
-    s += 1 * 1;
-    d += 1 * 4;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over(
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  if (dst_palette.len != 1024) {
-    return 0;
-  }
-  size_t dst_len4 = dst.len / 4;
-  size_t len = dst_len4 < src.len ? dst_len4 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  const size_t loop_unroll_count = 4;
-
-  while (n >= loop_unroll_count) {
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[0] * 4));
-    if (s0) {
-      wuffs_base__store_u32le__no_bounds_check(d + (0 * 4), s0);
-    }
-    uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[1] * 4));
-    if (s1) {
-      wuffs_base__store_u32le__no_bounds_check(d + (1 * 4), s1);
-    }
-    uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[2] * 4));
-    if (s2) {
-      wuffs_base__store_u32le__no_bounds_check(d + (2 * 4), s2);
-    }
-    uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[3] * 4));
-    if (s3) {
-      wuffs_base__store_u32le__no_bounds_check(d + (3 * 4), s3);
-    }
-
-    s += loop_unroll_count * 1;
-    d += loop_unroll_count * 4;
-    n -= loop_unroll_count;
-  }
-
-  while (n >= 1) {
-    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
-                                                          ((size_t)s[0] * 4));
-    if (s0) {
-      wuffs_base__store_u32le__no_bounds_check(d + (0 * 4), s0);
-    }
-
-    s += 1 * 1;
-    d += 1 * 4;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__xxxx__xxx(wuffs_base__slice_u8 dst,
-                                      wuffs_base__slice_u8 dst_palette,
-                                      wuffs_base__slice_u8 src) {
-  size_t dst_len4 = dst.len / 4;
-  size_t src_len3 = src.len / 3;
-  size_t len = dst_len4 < src_len3 ? dst_len4 : src_len3;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 4),
-        0xFF000000 | wuffs_base__load_u24le__no_bounds_check(s + (0 * 3)));
-
-    s += 1 * 3;
-    d += 1 * 4;
-    n -= 1;
-  }
-
-  return len;
-}
-
-static uint64_t  //
-wuffs_base__pixel_swizzler__xxxx__y(wuffs_base__slice_u8 dst,
-                                    wuffs_base__slice_u8 dst_palette,
-                                    wuffs_base__slice_u8 src) {
-  size_t dst_len4 = dst.len / 4;
-  size_t len = dst_len4 < src.len ? dst_len4 : src.len;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
-
-  // TODO: unroll.
-
-  while (n >= 1) {
-    wuffs_base__store_u32le__no_bounds_check(
-        d + (0 * 4), 0xFF000000 | (0x010101 * (uint32_t)s[0]));
-
-    s += 1 * 1;
-    d += 1 * 4;
-    n -= 1;
-  }
-
-  return len;
-}
-
-// --------
-
-static wuffs_base__pixel_swizzler__func  //
-wuffs_base__pixel_swizzler__prepare__y(wuffs_base__pixel_swizzler* p,
-                                       wuffs_base__pixel_format dst_format,
-                                       wuffs_base__slice_u8 dst_palette,
-                                       wuffs_base__slice_u8 src_palette,
-                                       wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
-    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
-      return wuffs_base__pixel_swizzler__bgr_565__y;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGR:
-    case WUFFS_BASE__PIXEL_FORMAT__RGB:
-      return wuffs_base__pixel_swizzler__xxx__y;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
-      return wuffs_base__pixel_swizzler__xxxx__y;
-  }
-  return NULL;
-}
-
-static wuffs_base__pixel_swizzler__func  //
-wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(
-    wuffs_base__pixel_swizzler* p,
-    wuffs_base__pixel_format dst_format,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src_palette,
-    wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
-    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:
-      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
-          1024) {
-        return NULL;
-      }
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__copy_1_1;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
-      if (wuffs_base__pixel_swizzler__squash_bgr_565_888(dst_palette,
-                                                         src_palette) != 1024) {
-        return NULL;
-      }
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__bgr_565__index__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGR:
-      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
-          1024) {
-        return NULL;
-      }
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__xxx__index__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
-      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
-          1024) {
-        return NULL;
-      }
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__xxxx__index__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__RGB:
-      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(dst_palette,
-                                                     src_palette) != 1024) {
-        return NULL;
-      }
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__xxx__index__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
-      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(dst_palette,
-                                                     src_palette) != 1024) {
-        return NULL;
-      }
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__xxxx__index__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;
-      }
-      return NULL;
-  }
-  return NULL;
-}
-
-static wuffs_base__pixel_swizzler__func  //
-wuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,
-                                         wuffs_base__pixel_format dst_format,
-                                         wuffs_base__slice_u8 dst_palette,
-                                         wuffs_base__slice_u8 src_palette,
-                                         wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
-    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
-      return wuffs_base__pixel_swizzler__bgr_565__bgr;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGR:
-      return wuffs_base__pixel_swizzler__copy_3_3;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
-      return wuffs_base__pixel_swizzler__xxxx__xxx;
-
-    case WUFFS_BASE__PIXEL_FORMAT__RGB:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
-      // TODO.
-      break;
-  }
-  return NULL;
-}
-
-static wuffs_base__pixel_swizzler__func  //
-wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(
-    wuffs_base__pixel_swizzler* p,
-    wuffs_base__pixel_format dst_format,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src_palette,
-    wuffs_base__pixel_blend blend) {
-  switch (dst_format.repr) {
-    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGR:
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__copy_4_4;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
-      switch (blend) {
-        case WUFFS_BASE__PIXEL_BLEND__SRC:
-          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src;
-        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
-          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over;
-      }
-      return NULL;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
-    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
-      // TODO.
-      break;
-
-    case WUFFS_BASE__PIXEL_FORMAT__RGB:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
-    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
-      // TODO.
-      break;
-  }
-  return NULL;
-}
-
-// --------
-
-wuffs_base__status  //
-wuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,
-                                    wuffs_base__pixel_format dst_format,
-                                    wuffs_base__slice_u8 dst_palette,
-                                    wuffs_base__pixel_format src_format,
-                                    wuffs_base__slice_u8 src_palette,
-                                    wuffs_base__pixel_blend blend) {
-  if (!p) {
-    return wuffs_base__make_status(wuffs_base__error__bad_receiver);
-  }
-
-  // TODO: support many more formats.
-
-  wuffs_base__pixel_swizzler__func func = NULL;
-
-  switch (src_format.repr) {
-    case WUFFS_BASE__PIXEL_FORMAT__Y:
-      func = wuffs_base__pixel_swizzler__prepare__y(p, dst_format, dst_palette,
-                                                    src_palette, blend);
-      break;
-
-    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:
-      func = wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(
-          p, dst_format, dst_palette, src_palette, blend);
-      break;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGR:
-      func = wuffs_base__pixel_swizzler__prepare__bgr(
-          p, dst_format, dst_palette, src_palette, blend);
-      break;
-
-    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
-      func = wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(
-          p, dst_format, dst_palette, src_palette, blend);
-      break;
-  }
-
-  p->private_impl.func = func;
-  return wuffs_base__make_status(
-      func ? NULL : wuffs_base__error__unsupported_pixel_swizzler_option);
-}
-
-uint64_t  //
-wuffs_base__pixel_swizzler__swizzle_interleaved(
-    const wuffs_base__pixel_swizzler* p,
-    wuffs_base__slice_u8 dst,
-    wuffs_base__slice_u8 dst_palette,
-    wuffs_base__slice_u8 src) {
-  if (p && p->private_impl.func) {
-    return (*p->private_impl.func)(dst, dst_palette, src);
-  }
-  return 0;
-}
-
 // ---------------- String Conversions
 
 // wuffs_base__parse_number__foo_digits entries are 0x00 for invalid digits,
@@ -10223,6 +9050,233 @@
   } while (0);
 }
 
+// ---------------- Hexadecimal
+
+size_t  //
+wuffs_base__hexadecimal__decode2(wuffs_base__slice_u8 dst,
+                                 wuffs_base__slice_u8 src) {
+  size_t src_len2 = src.len / 2;
+  size_t len = dst.len < src_len2 ? dst.len : src_len2;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  while (n--) {
+    *d = (uint8_t)((wuffs_base__parse_number__hexadecimal_digits[s[0]] << 4) |
+                   (wuffs_base__parse_number__hexadecimal_digits[s[1]] & 0x0F));
+    d += 1;
+    s += 2;
+  }
+
+  return len;
+}
+
+size_t  //
+wuffs_base__hexadecimal__decode4(wuffs_base__slice_u8 dst,
+                                 wuffs_base__slice_u8 src) {
+  size_t src_len4 = src.len / 4;
+  size_t len = dst.len < src_len4 ? dst.len : src_len4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  while (n--) {
+    *d = (uint8_t)((wuffs_base__parse_number__hexadecimal_digits[s[2]] << 4) |
+                   (wuffs_base__parse_number__hexadecimal_digits[s[3]] & 0x0F));
+    d += 1;
+    s += 4;
+  }
+
+  return len;
+}
+
+// ---------------- Unicode and UTF-8
+
+size_t  //
+wuffs_base__utf_8__encode(wuffs_base__slice_u8 dst, uint32_t code_point) {
+  if (code_point <= 0x7F) {
+    if (dst.len >= 1) {
+      dst.ptr[0] = (uint8_t)(code_point);
+      return 1;
+    }
+
+  } else if (code_point <= 0x07FF) {
+    if (dst.len >= 2) {
+      dst.ptr[0] = (uint8_t)(0xC0 | ((code_point >> 6)));
+      dst.ptr[1] = (uint8_t)(0x80 | ((code_point >> 0) & 0x3F));
+      return 2;
+    }
+
+  } else if (code_point <= 0xFFFF) {
+    if ((dst.len >= 3) && ((code_point < 0xD800) || (0xDFFF < code_point))) {
+      dst.ptr[0] = (uint8_t)(0xE0 | ((code_point >> 12)));
+      dst.ptr[1] = (uint8_t)(0x80 | ((code_point >> 6) & 0x3F));
+      dst.ptr[2] = (uint8_t)(0x80 | ((code_point >> 0) & 0x3F));
+      return 3;
+    }
+
+  } else if (code_point <= 0x10FFFF) {
+    if (dst.len >= 4) {
+      dst.ptr[0] = (uint8_t)(0xF0 | ((code_point >> 18)));
+      dst.ptr[1] = (uint8_t)(0x80 | ((code_point >> 12) & 0x3F));
+      dst.ptr[2] = (uint8_t)(0x80 | ((code_point >> 6) & 0x3F));
+      dst.ptr[3] = (uint8_t)(0x80 | ((code_point >> 0) & 0x3F));
+      return 4;
+    }
+  }
+
+  return 0;
+}
+
+// wuffs_base__utf_8__byte_length_minus_1 is the byte length (minus 1) of a
+// UTF-8 encoded code point, based on the encoding's initial byte.
+//  - 0x00 is 1-byte UTF-8 (ASCII).
+//  - 0x01 is the start of 2-byte UTF-8.
+//  - 0x02 is the start of 3-byte UTF-8.
+//  - 0x03 is the start of 4-byte UTF-8.
+//  - 0x40 is a UTF-8 tail byte.
+//  - 0x80 is invalid UTF-8.
+//
+// RFC 3629 (UTF-8) gives this grammar for valid UTF-8:
+//    UTF8-1      = %x00-7F
+//    UTF8-2      = %xC2-DF UTF8-tail
+//    UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
+//                  %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
+//    UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
+//                  %xF4 %x80-8F 2( UTF8-tail )
+//    UTF8-tail   = %x80-BF
+static const uint8_t wuffs_base__utf_8__byte_length_minus_1[256] = {
+    // 0     1     2     3     4     5     6     7
+    // 8     9     A     B     C     D     E     F
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x00 ..= 0x07.
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x08 ..= 0x0F.
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x10 ..= 0x17.
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x18 ..= 0x1F.
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x20 ..= 0x27.
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x28 ..= 0x2F.
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x30 ..= 0x37.
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x38 ..= 0x3F.
+
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x40 ..= 0x47.
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x48 ..= 0x4F.
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x50 ..= 0x57.
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x58 ..= 0x5F.
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x60 ..= 0x67.
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x68 ..= 0x6F.
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x70 ..= 0x77.
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x78 ..= 0x7F.
+
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x80 ..= 0x87.
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x88 ..= 0x8F.
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x90 ..= 0x97.
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x98 ..= 0x9F.
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xA0 ..= 0xA7.
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xA8 ..= 0xAF.
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xB0 ..= 0xB7.
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xB8 ..= 0xBF.
+
+    0x80, 0x80, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xC0 ..= 0xC7.
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xC8 ..= 0xCF.
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xD0 ..= 0xD7.
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xD8 ..= 0xDF.
+    0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,  // 0xE0 ..= 0xE7.
+    0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,  // 0xE8 ..= 0xEF.
+    0x03, 0x03, 0x03, 0x03, 0x03, 0x80, 0x80, 0x80,  // 0xF0 ..= 0xF7.
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,  // 0xF8 ..= 0xFF.
+    // 0     1     2     3     4     5     6     7
+    // 8     9     A     B     C     D     E     F
+};
+
+wuffs_base__utf_8__next__output  //
+wuffs_base__utf_8__next(wuffs_base__slice_u8 s) {
+  if (s.len == 0) {
+    return wuffs_base__make_utf_8__next__output(0, 0);
+  }
+  uint32_t c = s.ptr[0];
+  switch (wuffs_base__utf_8__byte_length_minus_1[c & 0xFF]) {
+    case 0:
+      return wuffs_base__make_utf_8__next__output(c, 1);
+
+    case 1:
+      if (s.len < 2) {
+        break;
+      }
+      c = wuffs_base__load_u16le__no_bounds_check(s.ptr);
+      if ((c & 0xC000) != 0x8000) {
+        break;
+      }
+      c = (0x0007C0 & (c << 6)) | (0x00003F & (c >> 8));
+      return wuffs_base__make_utf_8__next__output(c, 2);
+
+    case 2:
+      if (s.len < 3) {
+        break;
+      }
+      c = wuffs_base__load_u24le__no_bounds_check(s.ptr);
+      if ((c & 0xC0C000) != 0x808000) {
+        break;
+      }
+      c = (0x00F000 & (c << 12)) | (0x000FC0 & (c >> 2)) |
+          (0x00003F & (c >> 16));
+      if ((c <= 0x07FF) || ((0xD800 <= c) && (c <= 0xDFFF))) {
+        break;
+      }
+      return wuffs_base__make_utf_8__next__output(c, 3);
+
+    case 3:
+      if (s.len < 4) {
+        break;
+      }
+      c = wuffs_base__load_u32le__no_bounds_check(s.ptr);
+      if ((c & 0xC0C0C000) != 0x80808000) {
+        break;
+      }
+      c = (0x1C0000 & (c << 18)) | (0x03F000 & (c << 4)) |
+          (0x000FC0 & (c >> 10)) | (0x00003F & (c >> 24));
+      if ((c <= 0xFFFF) || (0x110000 <= c)) {
+        break;
+      }
+      return wuffs_base__make_utf_8__next__output(c, 4);
+  }
+
+  return wuffs_base__make_utf_8__next__output(
+      WUFFS_BASE__UNICODE_REPLACEMENT_CHARACTER, 1);
+}
+
+size_t  //
+wuffs_base__utf_8__longest_valid_prefix(wuffs_base__slice_u8 s) {
+  // TODO: possibly optimize the all-ASCII case (4 or 8 bytes at a time).
+  //
+  // TODO: possibly optimize this by manually inlining the
+  // wuffs_base__utf_8__next calls.
+  size_t original_len = s.len;
+  while (s.len > 0) {
+    wuffs_base__utf_8__next__output o = wuffs_base__utf_8__next(s);
+    if ((o.code_point > 0x7F) && (o.byte_length == 1)) {
+      break;
+    }
+    s.ptr += o.byte_length;
+    s.len -= o.byte_length;
+  }
+  return original_len - s.len;
+}
+
+size_t  //
+wuffs_base__ascii__longest_valid_prefix(wuffs_base__slice_u8 s) {
+  // TODO: possibly optimize this by checking 4 or 8 bytes at a time.
+  uint8_t* original_ptr = s.ptr;
+  uint8_t* p = s.ptr;
+  uint8_t* q = s.ptr + s.len;
+  for (; (p != q) && ((*p & 0x80) == 0); p++) {
+  }
+  return (size_t)(p - original_ptr);
+}
+
+#endif  // !defined(WUFFS_CONFIG__MODULES) ||
+        // defined(WUFFS_CONFIG__MODULE__BASE)
+
+#if !defined(WUFFS_CONFIG__MODULES) || defined(WUFFS_CONFIG__MODULE__BASE)
+
   // ---------------- IEEE 754 Floating Point
 
 #define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE 1023
@@ -11472,226 +10526,1184 @@
   } while (0);
 }
 
-// ---------------- Hexadecimal
+#endif  // !defined(WUFFS_CONFIG__MODULES) ||
+        // defined(WUFFS_CONFIG__MODULE__BASE)
 
-size_t  //
-wuffs_base__hexadecimal__decode2(wuffs_base__slice_u8 dst,
-                                 wuffs_base__slice_u8 src) {
-  size_t src_len2 = src.len / 2;
-  size_t len = dst.len < src_len2 ? dst.len : src_len2;
-  uint8_t* d = dst.ptr;
-  uint8_t* s = src.ptr;
-  size_t n = len;
+#if !defined(WUFFS_CONFIG__MODULES) || defined(WUFFS_CONFIG__MODULE__BASE)
 
-  while (n--) {
-    *d = (uint8_t)((wuffs_base__parse_number__hexadecimal_digits[s[0]] << 4) |
-                   (wuffs_base__parse_number__hexadecimal_digits[s[1]] & 0x0F));
-    d += 1;
-    s += 2;
+// ---------------- Pixel Swizzler
+
+static inline uint32_t  //
+wuffs_base__composite_nonpremul_nonpremul_u32_axxx(uint32_t dst_nonpremul,
+                                                   uint32_t src_nonpremul) {
+  // Convert from 8-bit color to 16-bit color.
+  uint32_t sa = 0x101 * (0xFF & (src_nonpremul >> 24));
+  uint32_t sr = 0x101 * (0xFF & (src_nonpremul >> 16));
+  uint32_t sg = 0x101 * (0xFF & (src_nonpremul >> 8));
+  uint32_t sb = 0x101 * (0xFF & (src_nonpremul >> 0));
+  uint32_t da = 0x101 * (0xFF & (dst_nonpremul >> 24));
+  uint32_t dr = 0x101 * (0xFF & (dst_nonpremul >> 16));
+  uint32_t dg = 0x101 * (0xFF & (dst_nonpremul >> 8));
+  uint32_t db = 0x101 * (0xFF & (dst_nonpremul >> 0));
+
+  // Convert dst from nonpremul to premul.
+  dr = (dr * da) / 0xFFFF;
+  dg = (dg * da) / 0xFFFF;
+  db = (db * da) / 0xFFFF;
+
+  // Calculate the inverse of the src-alpha: how much of the dst to keep.
+  uint32_t ia = 0xFFFF - sa;
+
+  // Composite src (nonpremul) over dst (premul).
+  da = sa + ((da * ia) / 0xFFFF);
+  dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
+  dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
+  db = ((sb * sa) + (db * ia)) / 0xFFFF;
+
+  // Convert dst from premul to nonpremul.
+  if (da != 0) {
+    dr = (dr * 0xFFFF) / da;
+    dg = (dg * 0xFFFF) / da;
+    db = (db * 0xFFFF) / da;
   }
 
-  return len;
+  // Convert from 16-bit color to 8-bit color and combine the components.
+  da >>= 8;
+  dr >>= 8;
+  dg >>= 8;
+  db >>= 8;
+  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
 }
 
-size_t  //
-wuffs_base__hexadecimal__decode4(wuffs_base__slice_u8 dst,
-                                 wuffs_base__slice_u8 src) {
-  size_t src_len4 = src.len / 4;
-  size_t len = dst.len < src_len4 ? dst.len : src_len4;
+static inline uint32_t  //
+wuffs_base__composite_nonpremul_premul_u32_axxx(uint32_t dst_nonpremul,
+                                                uint32_t src_premul) {
+  // Convert from 8-bit color to 16-bit color.
+  uint32_t sa = 0x101 * (0xFF & (src_premul >> 24));
+  uint32_t sr = 0x101 * (0xFF & (src_premul >> 16));
+  uint32_t sg = 0x101 * (0xFF & (src_premul >> 8));
+  uint32_t sb = 0x101 * (0xFF & (src_premul >> 0));
+  uint32_t da = 0x101 * (0xFF & (dst_nonpremul >> 24));
+  uint32_t dr = 0x101 * (0xFF & (dst_nonpremul >> 16));
+  uint32_t dg = 0x101 * (0xFF & (dst_nonpremul >> 8));
+  uint32_t db = 0x101 * (0xFF & (dst_nonpremul >> 0));
+
+  // Convert dst from nonpremul to premul.
+  dr = (dr * da) / 0xFFFF;
+  dg = (dg * da) / 0xFFFF;
+  db = (db * da) / 0xFFFF;
+
+  // Calculate the inverse of the src-alpha: how much of the dst to keep.
+  uint32_t ia = 0xFFFF - sa;
+
+  // Composite src (premul) over dst (premul).
+  da = sa + ((da * ia) / 0xFFFF);
+  dr = sr + ((dr * ia) / 0xFFFF);
+  dg = sg + ((dg * ia) / 0xFFFF);
+  db = sb + ((db * ia) / 0xFFFF);
+
+  // Convert dst from premul to nonpremul.
+  if (da != 0) {
+    dr = (dr * 0xFFFF) / da;
+    dg = (dg * 0xFFFF) / da;
+    db = (db * 0xFFFF) / da;
+  }
+
+  // Convert from 16-bit color to 8-bit color and combine the components.
+  da >>= 8;
+  dr >>= 8;
+  dg >>= 8;
+  db >>= 8;
+  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
+}
+
+static inline uint32_t  //
+wuffs_base__composite_premul_nonpremul_u32_axxx(uint32_t dst_premul,
+                                                uint32_t src_nonpremul) {
+  // Convert from 8-bit color to 16-bit color.
+  uint32_t sa = 0x101 * (0xFF & (src_nonpremul >> 24));
+  uint32_t sr = 0x101 * (0xFF & (src_nonpremul >> 16));
+  uint32_t sg = 0x101 * (0xFF & (src_nonpremul >> 8));
+  uint32_t sb = 0x101 * (0xFF & (src_nonpremul >> 0));
+  uint32_t da = 0x101 * (0xFF & (dst_premul >> 24));
+  uint32_t dr = 0x101 * (0xFF & (dst_premul >> 16));
+  uint32_t dg = 0x101 * (0xFF & (dst_premul >> 8));
+  uint32_t db = 0x101 * (0xFF & (dst_premul >> 0));
+
+  // Calculate the inverse of the src-alpha: how much of the dst to keep.
+  uint32_t ia = 0xFFFF - sa;
+
+  // Composite src (nonpremul) over dst (premul).
+  da = sa + ((da * ia) / 0xFFFF);
+  dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
+  dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
+  db = ((sb * sa) + (db * ia)) / 0xFFFF;
+
+  // Convert from 16-bit color to 8-bit color and combine the components.
+  da >>= 8;
+  dr >>= 8;
+  dg >>= 8;
+  db >>= 8;
+  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
+}
+
+static inline uint32_t  //
+wuffs_base__composite_premul_premul_u32_axxx(uint32_t dst_premul,
+                                             uint32_t src_premul) {
+  // Convert from 8-bit color to 16-bit color.
+  uint32_t sa = 0x101 * (0xFF & (src_premul >> 24));
+  uint32_t sr = 0x101 * (0xFF & (src_premul >> 16));
+  uint32_t sg = 0x101 * (0xFF & (src_premul >> 8));
+  uint32_t sb = 0x101 * (0xFF & (src_premul >> 0));
+  uint32_t da = 0x101 * (0xFF & (dst_premul >> 24));
+  uint32_t dr = 0x101 * (0xFF & (dst_premul >> 16));
+  uint32_t dg = 0x101 * (0xFF & (dst_premul >> 8));
+  uint32_t db = 0x101 * (0xFF & (dst_premul >> 0));
+
+  // Calculate the inverse of the src-alpha: how much of the dst to keep.
+  uint32_t ia = 0xFFFF - sa;
+
+  // Composite src (premul) over dst (premul).
+  da = sa + ((da * ia) / 0xFFFF);
+  dr = sr + ((dr * ia) / 0xFFFF);
+  dg = sg + ((dg * ia) / 0xFFFF);
+  db = sb + ((db * ia) / 0xFFFF);
+
+  // Convert from 16-bit color to 8-bit color and combine the components.
+  da >>= 8;
+  dr >>= 8;
+  dg >>= 8;
+  db >>= 8;
+  return (db << 0) | (dg << 8) | (dr << 16) | (da << 24);
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__squash_bgr_565_888(wuffs_base__slice_u8 dst,
+                                               wuffs_base__slice_u8 src) {
+  size_t len4 = (dst.len < src.len ? dst.len : src.len) / 4;
   uint8_t* d = dst.ptr;
   uint8_t* s = src.ptr;
-  size_t n = len;
 
+  size_t n = len4;
   while (n--) {
-    *d = (uint8_t)((wuffs_base__parse_number__hexadecimal_digits[s[2]] << 4) |
-                   (wuffs_base__parse_number__hexadecimal_digits[s[3]] & 0x0F));
-    d += 1;
+    uint32_t argb = wuffs_base__load_u32le__no_bounds_check(s);
+    uint32_t b5 = 0x1F & (argb >> (8 - 5));
+    uint32_t g6 = 0x3F & (argb >> (16 - 6));
+    uint32_t r5 = 0x1F & (argb >> (24 - 5));
+    uint32_t alpha = argb & 0xFF000000;
+    wuffs_base__store_u32le__no_bounds_check(
+        d, alpha | (r5 << 11) | (g6 << 5) | (b5 << 0));
     s += 4;
+    d += 4;
+  }
+  return len4 * 4;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__swap_rgbx_bgrx(wuffs_base__slice_u8 dst,
+                                           wuffs_base__slice_u8 src) {
+  size_t len4 = (dst.len < src.len ? dst.len : src.len) / 4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+
+  size_t n = len4;
+  while (n--) {
+    uint8_t b0 = s[0];
+    uint8_t b1 = s[1];
+    uint8_t b2 = s[2];
+    uint8_t b3 = s[3];
+    d[0] = b2;
+    d[1] = b1;
+    d[2] = b0;
+    d[3] = b3;
+    s += 4;
+    d += 4;
+  }
+  return len4 * 4;
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__copy_1_1(wuffs_base__slice_u8 dst,
+                                     wuffs_base__slice_u8 dst_palette,
+                                     wuffs_base__slice_u8 src) {
+  return wuffs_base__slice_u8__copy_from_slice(dst, src);
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__copy_3_3(wuffs_base__slice_u8 dst,
+                                     wuffs_base__slice_u8 dst_palette,
+                                     wuffs_base__slice_u8 src) {
+  size_t dst_len3 = dst.len / 3;
+  size_t src_len3 = src.len / 3;
+  size_t len = dst_len3 < src_len3 ? dst_len3 : src_len3;
+  if (len > 0) {
+    memmove(dst.ptr, src.ptr, len * 3);
+  }
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__copy_4_4(wuffs_base__slice_u8 dst,
+                                     wuffs_base__slice_u8 dst_palette,
+                                     wuffs_base__slice_u8 src) {
+  size_t dst_len4 = dst.len / 4;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
+  if (len > 0) {
+    memmove(dst.ptr, src.ptr, len * 4);
+  }
+  return len;
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr_565__bgr(wuffs_base__slice_u8 dst,
+                                         wuffs_base__slice_u8 dst_palette,
+                                         wuffs_base__slice_u8 src) {
+  size_t dst_len2 = dst.len / 2;
+  size_t src_len3 = src.len / 3;
+  size_t len = dst_len2 < src_len3 ? dst_len2 : src_len3;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t b5 = s[0] >> 3;
+    uint32_t g6 = s[1] >> 2;
+    uint32_t r5 = s[2] >> 3;
+    uint32_t rgb_565 = (r5 << 11) | (g6 << 5) | (b5 << 0);
+    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);
+
+    s += 1 * 3;
+    d += 1 * 2;
+    n -= 1;
   }
 
   return len;
 }
 
-// ---------------- Unicode and UTF-8
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  size_t dst_len2 = dst.len / 2;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len2 < src_len4 ? dst_len2 : src_len4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
 
-size_t  //
-wuffs_base__utf_8__encode(wuffs_base__slice_u8 dst, uint32_t code_point) {
-  if (code_point <= 0x7F) {
-    if (dst.len >= 1) {
-      dst.ptr[0] = (uint8_t)(code_point);
-      return 1;
-    }
+  // TODO: unroll.
 
-  } else if (code_point <= 0x07FF) {
-    if (dst.len >= 2) {
-      dst.ptr[0] = (uint8_t)(0xC0 | ((code_point >> 6)));
-      dst.ptr[1] = (uint8_t)(0x80 | ((code_point >> 0) & 0x3F));
-      return 2;
-    }
+  while (n >= 1) {
+    wuffs_base__store_u16le__no_bounds_check(
+        d + (0 * 2),
+        wuffs_base__color_u32_argb_premul__as__color_u16_rgb_565(
+            wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(
+                wuffs_base__load_u32le__no_bounds_check(s + (0 * 4)))));
 
-  } else if (code_point <= 0xFFFF) {
-    if ((dst.len >= 3) && ((code_point < 0xD800) || (0xDFFF < code_point))) {
-      dst.ptr[0] = (uint8_t)(0xE0 | ((code_point >> 12)));
-      dst.ptr[1] = (uint8_t)(0x80 | ((code_point >> 6) & 0x3F));
-      dst.ptr[2] = (uint8_t)(0x80 | ((code_point >> 0) & 0x3F));
-      return 3;
-    }
-
-  } else if (code_point <= 0x10FFFF) {
-    if (dst.len >= 4) {
-      dst.ptr[0] = (uint8_t)(0xF0 | ((code_point >> 18)));
-      dst.ptr[1] = (uint8_t)(0x80 | ((code_point >> 12) & 0x3F));
-      dst.ptr[2] = (uint8_t)(0x80 | ((code_point >> 6) & 0x3F));
-      dst.ptr[3] = (uint8_t)(0x80 | ((code_point >> 0) & 0x3F));
-      return 4;
-    }
+    s += 1 * 4;
+    d += 1 * 2;
+    n -= 1;
   }
 
-  return 0;
+  return len;
 }
 
-// wuffs_base__utf_8__byte_length_minus_1 is the byte length (minus 1) of a
-// UTF-8 encoded code point, based on the encoding's initial byte.
-//  - 0x00 is 1-byte UTF-8 (ASCII).
-//  - 0x01 is the start of 2-byte UTF-8.
-//  - 0x02 is the start of 3-byte UTF-8.
-//  - 0x03 is the start of 4-byte UTF-8.
-//  - 0x40 is a UTF-8 tail byte.
-//  - 0x80 is invalid UTF-8.
-//
-// RFC 3629 (UTF-8) gives this grammar for valid UTF-8:
-//    UTF8-1      = %x00-7F
-//    UTF8-2      = %xC2-DF UTF8-tail
-//    UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
-//                  %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
-//    UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
-//                  %xF4 %x80-8F 2( UTF8-tail )
-//    UTF8-tail   = %x80-BF
-static const uint8_t wuffs_base__utf_8__byte_length_minus_1[256] = {
-    // 0     1     2     3     4     5     6     7
-    // 8     9     A     B     C     D     E     F
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x00 ..= 0x07.
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x08 ..= 0x0F.
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x10 ..= 0x17.
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x18 ..= 0x1F.
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x20 ..= 0x27.
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x28 ..= 0x2F.
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x30 ..= 0x37.
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x38 ..= 0x3F.
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  size_t dst_len2 = dst.len / 2;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len2 < src_len4 ? dst_len2 : src_len4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
 
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x40 ..= 0x47.
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x48 ..= 0x4F.
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x50 ..= 0x57.
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x58 ..= 0x5F.
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x60 ..= 0x67.
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x68 ..= 0x6F.
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x70 ..= 0x77.
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 0x78 ..= 0x7F.
+  // TODO: unroll.
 
-    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x80 ..= 0x87.
-    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x88 ..= 0x8F.
-    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x90 ..= 0x97.
-    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x98 ..= 0x9F.
-    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xA0 ..= 0xA7.
-    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xA8 ..= 0xAF.
-    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xB0 ..= 0xB7.
-    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0xB8 ..= 0xBF.
+  while (n >= 1) {
+    // Convert from 8-bit color to 16-bit color.
+    uint32_t sa = 0x101 * ((uint32_t)s[3]);
+    uint32_t sr = 0x101 * ((uint32_t)s[2]);
+    uint32_t sg = 0x101 * ((uint32_t)s[1]);
+    uint32_t sb = 0x101 * ((uint32_t)s[0]);
 
-    0x80, 0x80, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xC0 ..= 0xC7.
-    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xC8 ..= 0xCF.
-    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xD0 ..= 0xD7.
-    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,  // 0xD8 ..= 0xDF.
-    0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,  // 0xE0 ..= 0xE7.
-    0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,  // 0xE8 ..= 0xEF.
-    0x03, 0x03, 0x03, 0x03, 0x03, 0x80, 0x80, 0x80,  // 0xF0 ..= 0xF7.
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,  // 0xF8 ..= 0xFF.
-    // 0     1     2     3     4     5     6     7
-    // 8     9     A     B     C     D     E     F
-};
+    // Convert from 565 color to 16-bit color.
+    uint32_t old_rgb_565 = wuffs_base__load_u16le__no_bounds_check(d + (0 * 2));
+    uint32_t old_r5 = 0x1F & (old_rgb_565 >> 11);
+    uint32_t dr = (0x8421 * old_r5) >> 4;
+    uint32_t old_g6 = 0x3F & (old_rgb_565 >> 5);
+    uint32_t dg = (0x1041 * old_g6) >> 2;
+    uint32_t old_b5 = 0x1F & (old_rgb_565 >> 0);
+    uint32_t db = (0x8421 * old_b5) >> 4;
 
-wuffs_base__utf_8__next__output  //
-wuffs_base__utf_8__next(wuffs_base__slice_u8 s) {
-  if (s.len == 0) {
-    return wuffs_base__make_utf_8__next__output(0, 0);
-  }
-  uint32_t c = s.ptr[0];
-  switch (wuffs_base__utf_8__byte_length_minus_1[c & 0xFF]) {
-    case 0:
-      return wuffs_base__make_utf_8__next__output(c, 1);
+    // Calculate the inverse of the src-alpha: how much of the dst to keep.
+    uint32_t ia = 0xFFFF - sa;
 
-    case 1:
-      if (s.len < 2) {
-        break;
-      }
-      c = wuffs_base__load_u16le__no_bounds_check(s.ptr);
-      if ((c & 0xC000) != 0x8000) {
-        break;
-      }
-      c = (0x0007C0 & (c << 6)) | (0x00003F & (c >> 8));
-      return wuffs_base__make_utf_8__next__output(c, 2);
+    // Composite src (nonpremul) over dst (premul).
+    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
+    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
+    db = ((sb * sa) + (db * ia)) / 0xFFFF;
 
-    case 2:
-      if (s.len < 3) {
-        break;
-      }
-      c = wuffs_base__load_u24le__no_bounds_check(s.ptr);
-      if ((c & 0xC0C000) != 0x808000) {
-        break;
-      }
-      c = (0x00F000 & (c << 12)) | (0x000FC0 & (c >> 2)) |
-          (0x00003F & (c >> 16));
-      if ((c <= 0x07FF) || ((0xD800 <= c) && (c <= 0xDFFF))) {
-        break;
-      }
-      return wuffs_base__make_utf_8__next__output(c, 3);
+    // Convert from 16-bit color to 565 color and combine the components.
+    uint32_t new_r5 = 0x1F & (dr >> 11);
+    uint32_t new_g6 = 0x3F & (dg >> 10);
+    uint32_t new_b5 = 0x1F & (db >> 11);
+    uint32_t new_rgb_565 = (new_r5 << 11) | (new_g6 << 5) | (new_b5 << 0);
+    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2),
+                                             (uint16_t)new_rgb_565);
 
-    case 3:
-      if (s.len < 4) {
-        break;
-      }
-      c = wuffs_base__load_u32le__no_bounds_check(s.ptr);
-      if ((c & 0xC0C0C000) != 0x80808000) {
-        break;
-      }
-      c = (0x1C0000 & (c << 18)) | (0x03F000 & (c << 4)) |
-          (0x000FC0 & (c >> 10)) | (0x00003F & (c >> 24));
-      if ((c <= 0xFFFF) || (0x110000 <= c)) {
-        break;
-      }
-      return wuffs_base__make_utf_8__next__output(c, 4);
+    s += 1 * 4;
+    d += 1 * 2;
+    n -= 1;
   }
 
-  return wuffs_base__make_utf_8__next__output(
-      WUFFS_BASE__UNICODE_REPLACEMENT_CHARACTER, 1);
+  return len;
 }
 
-size_t  //
-wuffs_base__utf_8__longest_valid_prefix(wuffs_base__slice_u8 s) {
-  // TODO: possibly optimize the all-ASCII case (4 or 8 bytes at a time).
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr_565__y(wuffs_base__slice_u8 dst,
+                                       wuffs_base__slice_u8 dst_palette,
+                                       wuffs_base__slice_u8 src) {
+  size_t dst_len2 = dst.len / 2;
+  size_t len = dst_len2 < src.len ? dst_len2 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t y5 = s[0] >> 3;
+    uint32_t y6 = s[0] >> 2;
+    uint32_t rgb_565 = (y5 << 11) | (y6 << 5) | (y5 << 0);
+    wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)rgb_565);
+
+    s += 1 * 1;
+    d += 1 * 2;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr_565__index__src(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  if (dst_palette.len != 1024) {
+    return 0;
+  }
+  size_t dst_len2 = dst.len / 2;
+  size_t len = dst_len2 < src.len ? dst_len2 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  const size_t loop_unroll_count = 4;
+
+  while (n >= loop_unroll_count) {
+    wuffs_base__store_u16le__no_bounds_check(
+        d + (0 * 2), wuffs_base__load_u16le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[0] * 4)));
+    wuffs_base__store_u16le__no_bounds_check(
+        d + (1 * 2), wuffs_base__load_u16le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[1] * 4)));
+    wuffs_base__store_u16le__no_bounds_check(
+        d + (2 * 2), wuffs_base__load_u16le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[2] * 4)));
+    wuffs_base__store_u16le__no_bounds_check(
+        d + (3 * 2), wuffs_base__load_u16le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[3] * 4)));
+
+    s += loop_unroll_count * 1;
+    d += loop_unroll_count * 2;
+    n -= loop_unroll_count;
+  }
+
+  while (n >= 1) {
+    wuffs_base__store_u16le__no_bounds_check(
+        d + (0 * 2), wuffs_base__load_u16le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[0] * 4)));
+
+    s += 1 * 1;
+    d += 1 * 2;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  if (dst_palette.len != 1024) {
+    return 0;
+  }
+  size_t dst_len2 = dst.len / 2;
+  size_t len = dst_len2 < src.len ? dst_len2 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[0] * 4));
+    if (s0) {
+      wuffs_base__store_u16le__no_bounds_check(d + (0 * 2), (uint16_t)s0);
+    }
+
+    s += 1 * 1;
+    d += 1 * 2;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  size_t dst_len3 = dst.len / 3;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len3 < src_len4 ? dst_len3 : src_len4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t s0 =
+        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(
+            wuffs_base__load_u32le__no_bounds_check(s + (0 * 4)));
+    wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
+
+    s += 1 * 4;
+    d += 1 * 3;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  size_t dst_len3 = dst.len / 3;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len3 < src_len4 ? dst_len3 : src_len4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    // Convert from 8-bit color to 16-bit color.
+    uint32_t sa = 0x101 * ((uint32_t)s[3]);
+    uint32_t sr = 0x101 * ((uint32_t)s[2]);
+    uint32_t sg = 0x101 * ((uint32_t)s[1]);
+    uint32_t sb = 0x101 * ((uint32_t)s[0]);
+    uint32_t dr = 0x101 * ((uint32_t)d[2]);
+    uint32_t dg = 0x101 * ((uint32_t)d[1]);
+    uint32_t db = 0x101 * ((uint32_t)d[0]);
+
+    // Calculate the inverse of the src-alpha: how much of the dst to keep.
+    uint32_t ia = 0xFFFF - sa;
+
+    // Composite src (nonpremul) over dst (premul).
+    dr = ((sr * sa) + (dr * ia)) / 0xFFFF;
+    dg = ((sg * sa) + (dg * ia)) / 0xFFFF;
+    db = ((sb * sa) + (db * ia)) / 0xFFFF;
+
+    // Convert from 16-bit color to 8-bit color.
+    d[0] = (uint8_t)(db >> 8);
+    d[1] = (uint8_t)(dg >> 8);
+    d[2] = (uint8_t)(dr >> 8);
+
+    s += 1 * 4;
+    d += 1 * 3;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  size_t dst_len4 = dst.len / 4;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t d0 = wuffs_base__load_u32le__no_bounds_check(d + (0 * 4));
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 4),
+        wuffs_base__composite_nonpremul_nonpremul_u32_axxx(d0, s0));
+
+    s += 1 * 4;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  size_t dst_len4 = dst.len / 4;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 4),
+        wuffs_base__color_u32_argb_nonpremul__as__color_u32_argb_premul(s0));
+
+    s += 1 * 4;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  size_t dst_len4 = dst.len / 4;
+  size_t src_len4 = src.len / 4;
+  size_t len = dst_len4 < src_len4 ? dst_len4 : src_len4;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint32_t d0 = wuffs_base__load_u32le__no_bounds_check(d + (0 * 4));
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(s + (0 * 4));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 4), wuffs_base__composite_premul_nonpremul_u32_axxx(d0, s0));
+
+    s += 1 * 4;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxx__index__src(wuffs_base__slice_u8 dst,
+                                            wuffs_base__slice_u8 dst_palette,
+                                            wuffs_base__slice_u8 src) {
+  if (dst_palette.len != 1024) {
+    return 0;
+  }
+  size_t dst_len3 = dst.len / 3;
+  size_t len = dst_len3 < src.len ? dst_len3 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  const size_t loop_unroll_count = 4;
+
+  // The comparison in the while condition is ">", not ">=", because with
+  // ">=", the last 4-byte store could write past the end of the dst slice.
   //
-  // TODO: possibly optimize this by manually inlining the
-  // wuffs_base__utf_8__next calls.
-  size_t original_len = s.len;
-  while (s.len > 0) {
-    wuffs_base__utf_8__next__output o = wuffs_base__utf_8__next(s);
-    if ((o.code_point > 0x7F) && (o.byte_length == 1)) {
-      break;
-    }
-    s.ptr += o.byte_length;
-    s.len -= o.byte_length;
+  // Each 4-byte store writes one too many bytes, but a subsequent store
+  // will overwrite that with the correct byte. There is always another
+  // store, whether a 4-byte store in this loop or a 1-byte store in the
+  // next loop.
+  while (n > loop_unroll_count) {
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 3), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[0] * 4)));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (1 * 3), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[1] * 4)));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (2 * 3), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[2] * 4)));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (3 * 3), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[3] * 4)));
+
+    s += loop_unroll_count * 1;
+    d += loop_unroll_count * 3;
+    n -= loop_unroll_count;
   }
-  return original_len - s.len;
+
+  while (n >= 1) {
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[0] * 4));
+    wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
+
+    s += 1 * 1;
+    d += 1 * 3;
+    n -= 1;
+  }
+
+  return len;
 }
 
-size_t  //
-wuffs_base__ascii__longest_valid_prefix(wuffs_base__slice_u8 s) {
-  // TODO: possibly optimize this by checking 4 or 8 bytes at a time.
-  uint8_t* original_ptr = s.ptr;
-  uint8_t* p = s.ptr;
-  uint8_t* q = s.ptr + s.len;
-  for (; (p != q) && ((*p & 0x80) == 0); p++) {
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  if (dst_palette.len != 1024) {
+    return 0;
   }
-  return (size_t)(p - original_ptr);
+  size_t dst_len3 = dst.len / 3;
+  size_t len = dst_len3 < src.len ? dst_len3 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  const size_t loop_unroll_count = 4;
+
+  while (n >= loop_unroll_count) {
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[0] * 4));
+    if (s0) {
+      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
+    }
+    uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[1] * 4));
+    if (s1) {
+      wuffs_base__store_u24le__no_bounds_check(d + (1 * 3), s1);
+    }
+    uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[2] * 4));
+    if (s2) {
+      wuffs_base__store_u24le__no_bounds_check(d + (2 * 3), s2);
+    }
+    uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[3] * 4));
+    if (s3) {
+      wuffs_base__store_u24le__no_bounds_check(d + (3 * 3), s3);
+    }
+
+    s += loop_unroll_count * 1;
+    d += loop_unroll_count * 3;
+    n -= loop_unroll_count;
+  }
+
+  while (n >= 1) {
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[0] * 4));
+    if (s0) {
+      wuffs_base__store_u24le__no_bounds_check(d + (0 * 3), s0);
+    }
+
+    s += 1 * 1;
+    d += 1 * 3;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxx__y(wuffs_base__slice_u8 dst,
+                                   wuffs_base__slice_u8 dst_palette,
+                                   wuffs_base__slice_u8 src) {
+  size_t dst_len3 = dst.len / 3;
+  size_t len = dst_len3 < src.len ? dst_len3 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    uint8_t s0 = s[0];
+    d[0] = s0;
+    d[1] = s0;
+    d[2] = s0;
+
+    s += 1 * 1;
+    d += 1 * 3;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxxx__index__src(wuffs_base__slice_u8 dst,
+                                             wuffs_base__slice_u8 dst_palette,
+                                             wuffs_base__slice_u8 src) {
+  if (dst_palette.len != 1024) {
+    return 0;
+  }
+  size_t dst_len4 = dst.len / 4;
+  size_t len = dst_len4 < src.len ? dst_len4 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  const size_t loop_unroll_count = 4;
+
+  while (n >= loop_unroll_count) {
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[0] * 4)));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (1 * 4), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[1] * 4)));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (2 * 4), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[2] * 4)));
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (3 * 4), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[3] * 4)));
+
+    s += loop_unroll_count * 1;
+    d += loop_unroll_count * 4;
+    n -= loop_unroll_count;
+  }
+
+  while (n >= 1) {
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 4), wuffs_base__load_u32le__no_bounds_check(
+                         dst_palette.ptr + ((size_t)s[0] * 4)));
+
+    s += 1 * 1;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over(
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  if (dst_palette.len != 1024) {
+    return 0;
+  }
+  size_t dst_len4 = dst.len / 4;
+  size_t len = dst_len4 < src.len ? dst_len4 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  const size_t loop_unroll_count = 4;
+
+  while (n >= loop_unroll_count) {
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[0] * 4));
+    if (s0) {
+      wuffs_base__store_u32le__no_bounds_check(d + (0 * 4), s0);
+    }
+    uint32_t s1 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[1] * 4));
+    if (s1) {
+      wuffs_base__store_u32le__no_bounds_check(d + (1 * 4), s1);
+    }
+    uint32_t s2 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[2] * 4));
+    if (s2) {
+      wuffs_base__store_u32le__no_bounds_check(d + (2 * 4), s2);
+    }
+    uint32_t s3 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[3] * 4));
+    if (s3) {
+      wuffs_base__store_u32le__no_bounds_check(d + (3 * 4), s3);
+    }
+
+    s += loop_unroll_count * 1;
+    d += loop_unroll_count * 4;
+    n -= loop_unroll_count;
+  }
+
+  while (n >= 1) {
+    uint32_t s0 = wuffs_base__load_u32le__no_bounds_check(dst_palette.ptr +
+                                                          ((size_t)s[0] * 4));
+    if (s0) {
+      wuffs_base__store_u32le__no_bounds_check(d + (0 * 4), s0);
+    }
+
+    s += 1 * 1;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxxx__xxx(wuffs_base__slice_u8 dst,
+                                      wuffs_base__slice_u8 dst_palette,
+                                      wuffs_base__slice_u8 src) {
+  size_t dst_len4 = dst.len / 4;
+  size_t src_len3 = src.len / 3;
+  size_t len = dst_len4 < src_len3 ? dst_len4 : src_len3;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 4),
+        0xFF000000 | wuffs_base__load_u24le__no_bounds_check(s + (0 * 3)));
+
+    s += 1 * 3;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+static uint64_t  //
+wuffs_base__pixel_swizzler__xxxx__y(wuffs_base__slice_u8 dst,
+                                    wuffs_base__slice_u8 dst_palette,
+                                    wuffs_base__slice_u8 src) {
+  size_t dst_len4 = dst.len / 4;
+  size_t len = dst_len4 < src.len ? dst_len4 : src.len;
+  uint8_t* d = dst.ptr;
+  uint8_t* s = src.ptr;
+  size_t n = len;
+
+  // TODO: unroll.
+
+  while (n >= 1) {
+    wuffs_base__store_u32le__no_bounds_check(
+        d + (0 * 4), 0xFF000000 | (0x010101 * (uint32_t)s[0]));
+
+    s += 1 * 1;
+    d += 1 * 4;
+    n -= 1;
+  }
+
+  return len;
+}
+
+// --------
+
+static wuffs_base__pixel_swizzler__func  //
+wuffs_base__pixel_swizzler__prepare__y(wuffs_base__pixel_swizzler* p,
+                                       wuffs_base__pixel_format dst_format,
+                                       wuffs_base__slice_u8 dst_palette,
+                                       wuffs_base__slice_u8 src_palette,
+                                       wuffs_base__pixel_blend blend) {
+  switch (dst_format.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
+      return wuffs_base__pixel_swizzler__bgr_565__y;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGR:
+    case WUFFS_BASE__PIXEL_FORMAT__RGB:
+      return wuffs_base__pixel_swizzler__xxx__y;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
+      return wuffs_base__pixel_swizzler__xxxx__y;
+  }
+  return NULL;
+}
+
+static wuffs_base__pixel_swizzler__func  //
+wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(
+    wuffs_base__pixel_swizzler* p,
+    wuffs_base__pixel_format dst_format,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src_palette,
+    wuffs_base__pixel_blend blend) {
+  switch (dst_format.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:
+      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
+          1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__copy_1_1;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
+      if (wuffs_base__pixel_swizzler__squash_bgr_565_888(dst_palette,
+                                                         src_palette) != 1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__bgr_565__index__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgr_565__index_binary_alpha__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGR:
+      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
+          1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__xxx__index__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
+      if (wuffs_base__slice_u8__copy_from_slice(dst_palette, src_palette) !=
+          1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__xxxx__index__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__RGB:
+      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(dst_palette,
+                                                     src_palette) != 1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__xxx__index__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__xxx__index_binary_alpha__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
+      if (wuffs_base__pixel_swizzler__swap_rgbx_bgrx(dst_palette,
+                                                     src_palette) != 1024) {
+        return NULL;
+      }
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__xxxx__index__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__xxxx__index_binary_alpha__src_over;
+      }
+      return NULL;
+  }
+  return NULL;
+}
+
+static wuffs_base__pixel_swizzler__func  //
+wuffs_base__pixel_swizzler__prepare__bgr(wuffs_base__pixel_swizzler* p,
+                                         wuffs_base__pixel_format dst_format,
+                                         wuffs_base__slice_u8 dst_palette,
+                                         wuffs_base__slice_u8 src_palette,
+                                         wuffs_base__pixel_blend blend) {
+  switch (dst_format.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
+      return wuffs_base__pixel_swizzler__bgr_565__bgr;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGR:
+      return wuffs_base__pixel_swizzler__copy_3_3;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
+      return wuffs_base__pixel_swizzler__xxxx__xxx;
+
+    case WUFFS_BASE__PIXEL_FORMAT__RGB:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
+      // TODO.
+      break;
+  }
+  return NULL;
+}
+
+static wuffs_base__pixel_swizzler__func  //
+wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(
+    wuffs_base__pixel_swizzler* p,
+    wuffs_base__pixel_format dst_format,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src_palette,
+    wuffs_base__pixel_blend blend) {
+  switch (dst_format.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__BGR_565:
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgr_565__bgra_nonpremul__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGR:
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgr__bgra_nonpremul__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__copy_4_4;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgra_nonpremul__bgra_nonpremul__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
+      switch (blend) {
+        case WUFFS_BASE__PIXEL_BLEND__SRC:
+          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src;
+        case WUFFS_BASE__PIXEL_BLEND__SRC_OVER:
+          return wuffs_base__pixel_swizzler__bgra_premul__bgra_nonpremul__src_over;
+      }
+      return NULL;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__BGRX:
+      // TODO.
+      break;
+
+    case WUFFS_BASE__PIXEL_FORMAT__RGB:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
+    case WUFFS_BASE__PIXEL_FORMAT__RGBX:
+      // TODO.
+      break;
+  }
+  return NULL;
+}
+
+// --------
+
+wuffs_base__status  //
+wuffs_base__pixel_swizzler__prepare(wuffs_base__pixel_swizzler* p,
+                                    wuffs_base__pixel_format dst_format,
+                                    wuffs_base__slice_u8 dst_palette,
+                                    wuffs_base__pixel_format src_format,
+                                    wuffs_base__slice_u8 src_palette,
+                                    wuffs_base__pixel_blend blend) {
+  if (!p) {
+    return wuffs_base__make_status(wuffs_base__error__bad_receiver);
+  }
+
+  // TODO: support many more formats.
+
+  wuffs_base__pixel_swizzler__func func = NULL;
+
+  switch (src_format.repr) {
+    case WUFFS_BASE__PIXEL_FORMAT__Y:
+      func = wuffs_base__pixel_swizzler__prepare__y(p, dst_format, dst_palette,
+                                                    src_palette, blend);
+      break;
+
+    case WUFFS_BASE__PIXEL_FORMAT__INDEXED__BGRA_BINARY:
+      func = wuffs_base__pixel_swizzler__prepare__indexed__bgra_binary(
+          p, dst_format, dst_palette, src_palette, blend);
+      break;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGR:
+      func = wuffs_base__pixel_swizzler__prepare__bgr(
+          p, dst_format, dst_palette, src_palette, blend);
+      break;
+
+    case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
+      func = wuffs_base__pixel_swizzler__prepare__bgra_nonpremul(
+          p, dst_format, dst_palette, src_palette, blend);
+      break;
+  }
+
+  p->private_impl.func = func;
+  return wuffs_base__make_status(
+      func ? NULL : wuffs_base__error__unsupported_pixel_swizzler_option);
+}
+
+uint64_t  //
+wuffs_base__pixel_swizzler__swizzle_interleaved(
+    const wuffs_base__pixel_swizzler* p,
+    wuffs_base__slice_u8 dst,
+    wuffs_base__slice_u8 dst_palette,
+    wuffs_base__slice_u8 src) {
+  if (p && p->private_impl.func) {
+    return (*p->private_impl.func)(dst, dst_palette, src);
+  }
+  return 0;
 }
 
 #endif  // !defined(WUFFS_CONFIG__MODULES) ||