std/etc2: support 11-bit source formats
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 5d20ca2..dcdaf13 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -9384,7 +9384,6 @@
 
 extern const char wuffs_etc2__error__bad_header[];
 extern const char wuffs_etc2__error__truncated_input[];
-extern const char wuffs_etc2__error__unsupported_etc2_file[];
 
 // ---------------- Public Consts
 
@@ -9570,6 +9569,8 @@
     uint32_t p_decode_frame;
     uint32_t p_do_decode_frame;
     uint32_t p_from_src_to_colors;
+    wuffs_base__empty_struct (*choosy_from_colors_to_buffer)(
+        wuffs_etc2__decoder* self);
   } private_impl;
 
   struct {
@@ -43577,7 +43578,6 @@
 
 const char wuffs_etc2__error__bad_header[] = "#etc2: bad header";
 const char wuffs_etc2__error__truncated_input[] = "#etc2: truncated input";
-const char wuffs_etc2__error__unsupported_etc2_file[] = "#etc2: unsupported ETC2 file";
 
 // ---------------- Private Consts
 
@@ -43840,6 +43840,11 @@
 
 WUFFS_BASE__GENERATED_C_CODE
 static wuffs_base__empty_struct
+wuffs_etc2__decoder__from_colors_to_buffer__choosy_default(
+    wuffs_etc2__decoder* self);
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
 wuffs_etc2__decoder__decode_t_mode(
     wuffs_etc2__decoder* self,
     uint64_t a_bits,
@@ -43881,6 +43886,42 @@
     wuffs_etc2__decoder* self);
 
 WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__from_colors_to_buffer_r11u(
+    wuffs_etc2__decoder* self);
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__from_colors_to_buffer_r11s(
+    wuffs_etc2__decoder* self);
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__from_colors_to_buffer_rg11u(
+    wuffs_etc2__decoder* self);
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__from_colors_to_buffer_rg11s(
+    wuffs_etc2__decoder* self);
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__from_colors_to_buffer_unsigned(
+    wuffs_etc2__decoder* self,
+    uint32_t a_input,
+    uint32_t a_dst_bytes_per_pixel,
+    uint32_t a_offset_adjustment);
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__from_colors_to_buffer_signed(
+    wuffs_etc2__decoder* self,
+    uint32_t a_input,
+    uint32_t a_dst_bytes_per_pixel,
+    uint32_t a_offset_adjustment);
+
+WUFFS_BASE__GENERATED_C_CODE
 static wuffs_base__status
 wuffs_etc2__decoder__from_buffer_to_dst(
     wuffs_etc2__decoder* self,
@@ -43965,6 +44006,8 @@
     }
   }
 
+  self->private_impl.choosy_from_colors_to_buffer = &wuffs_etc2__decoder__from_colors_to_buffer__choosy_default;
+
   self->private_impl.magic = WUFFS_BASE__MAGIC;
   self->private_impl.vtable_for__wuffs_base__image_decoder.vtable_name =
       wuffs_base__image_decoder__vtable_name;
@@ -44111,6 +44154,7 @@
   wuffs_base__status status = wuffs_base__make_status(NULL);
 
   uint32_t v_c32 = 0;
+  uint32_t v_i = 0;
   uint16_t v_rounded_up_width = 0;
   uint16_t v_rounded_up_height = 0;
 
@@ -44205,14 +44249,37 @@
       self->private_impl.f_pixfmt = 2164295816u;
     } else if ((v_c32 == 67121202u) || (v_c32 == 184561714u)) {
       self->private_impl.f_pixfmt = 2197850248u;
-    } else if ((v_c32 & 65535u) == 12338u) {
-      status = wuffs_base__make_status(wuffs_etc2__error__unsupported_etc2_file);
-      goto exit;
+    } else if (v_c32 == 83898418u) {
+      self->private_impl.f_pixfmt = 536870923u;
+      self->private_impl.choosy_from_colors_to_buffer = (
+          &wuffs_etc2__decoder__from_colors_to_buffer_r11u);
+    } else if (v_c32 == 100675634u) {
+      self->private_impl.f_pixfmt = 2164308923u;
+      self->private_impl.choosy_from_colors_to_buffer = (
+          &wuffs_etc2__decoder__from_colors_to_buffer_rg11u);
+    } else if (v_c32 == 117452850u) {
+      self->private_impl.f_pixfmt = 536870923u;
+      self->private_impl.choosy_from_colors_to_buffer = (
+          &wuffs_etc2__decoder__from_colors_to_buffer_r11s);
+    } else if (v_c32 == 134230066u) {
+      self->private_impl.f_pixfmt = 2164308923u;
+      self->private_impl.choosy_from_colors_to_buffer = (
+          &wuffs_etc2__decoder__from_colors_to_buffer_rg11s);
     } else {
       status = wuffs_base__make_status(wuffs_etc2__error__bad_header);
       goto exit;
     }
     self->private_impl.f_srgb = ((v_c32 >> 24u) >= 9u);
+    if (self->private_impl.f_pixfmt == 2164308923u) {
+      v_i = 0u;
+      while (v_i <= 4088u) {
+        self->private_data.f_buffer[(v_i + 0u)] = 0u;
+        self->private_data.f_buffer[(v_i + 1u)] = 0u;
+        self->private_data.f_buffer[(v_i + 6u)] = 255u;
+        self->private_data.f_buffer[(v_i + 7u)] = 255u;
+        v_i += 8u;
+      }
+    }
     {
       WUFFS_BASE__COROUTINE_SUSPENSION_POINT(5);
       uint16_t t_2;
@@ -44616,6 +44683,7 @@
   wuffs_base__status status = wuffs_base__make_status(NULL);
 
   uint32_t v_remaining = 0;
+  uint32_t v_max_nbb = 0;
   wuffs_base__status v_status = wuffs_base__make_status(NULL);
 
   uint32_t coro_susp_point = self->private_impl.p_do_decode_frame;
@@ -44656,7 +44724,11 @@
     self->private_impl.f_dst_y = 0u;
     v_remaining = (((self->private_impl.f_width + 3u) / 4u) * ((self->private_impl.f_height + 3u) / 4u));
     while (v_remaining > 0u) {
-      self->private_impl.f_num_buffered_blocks = wuffs_base__u32__min(v_remaining, 64u);
+      v_max_nbb = 64u;
+      if ((self->private_impl.f_pixfmt == 536870923u) || (self->private_impl.f_pixfmt == 2164308923u)) {
+        v_max_nbb = 32u;
+      }
+      self->private_impl.f_num_buffered_blocks = wuffs_base__u32__min(v_remaining, v_max_nbb);
       if (v_remaining < self->private_impl.f_num_buffered_blocks) {
         status = wuffs_base__make_status(wuffs_base__error__too_much_data);
         goto exit;
@@ -44730,7 +44802,7 @@
     WUFFS_BASE__COROUTINE_SUSPENSION_POINT_0;
 
     while (v_bi < self->private_impl.f_num_buffered_blocks) {
-      if (self->private_impl.f_pixfmt == 2164295816u) {
+      if ((self->private_impl.f_pixfmt == 2164295816u) || (self->private_impl.f_pixfmt == 2164308923u)) {
         {
           WUFFS_BASE__COROUTINE_SUSPENSION_POINT(1);
           uint64_t t_0;
@@ -44819,6 +44891,13 @@
 static wuffs_base__empty_struct
 wuffs_etc2__decoder__from_colors_to_buffer(
     wuffs_etc2__decoder* self) {
+  return (*self->private_impl.choosy_from_colors_to_buffer)(self);
+}
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__from_colors_to_buffer__choosy_default(
+    wuffs_etc2__decoder* self) {
   uint32_t v_bi = 0;
   uint64_t v_color = 0;
   uint32_t v_r0 = 0;
@@ -45226,6 +45305,190 @@
   return wuffs_base__make_empty_struct();
 }
 
+// -------- func etc2.decoder.from_colors_to_buffer_r11u
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__from_colors_to_buffer_r11u(
+    wuffs_etc2__decoder* self) {
+  wuffs_etc2__decoder__from_colors_to_buffer_unsigned(self, 1u, 2u, 0u);
+  return wuffs_base__make_empty_struct();
+}
+
+// -------- func etc2.decoder.from_colors_to_buffer_r11s
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__from_colors_to_buffer_r11s(
+    wuffs_etc2__decoder* self) {
+  wuffs_etc2__decoder__from_colors_to_buffer_signed(self, 1u, 2u, 0u);
+  return wuffs_base__make_empty_struct();
+}
+
+// -------- func etc2.decoder.from_colors_to_buffer_rg11u
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__from_colors_to_buffer_rg11u(
+    wuffs_etc2__decoder* self) {
+  wuffs_etc2__decoder__from_colors_to_buffer_unsigned(self, 0u, 8u, 4u);
+  wuffs_etc2__decoder__from_colors_to_buffer_unsigned(self, 1u, 8u, 2u);
+  return wuffs_base__make_empty_struct();
+}
+
+// -------- func etc2.decoder.from_colors_to_buffer_rg11s
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__from_colors_to_buffer_rg11s(
+    wuffs_etc2__decoder* self) {
+  wuffs_etc2__decoder__from_colors_to_buffer_signed(self, 0u, 8u, 4u);
+  wuffs_etc2__decoder__from_colors_to_buffer_signed(self, 1u, 8u, 2u);
+  return wuffs_base__make_empty_struct();
+}
+
+// -------- func etc2.decoder.from_colors_to_buffer_unsigned
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__from_colors_to_buffer_unsigned(
+    wuffs_etc2__decoder* self,
+    uint32_t a_input,
+    uint32_t a_dst_bytes_per_pixel,
+    uint32_t a_offset_adjustment) {
+  uint32_t v_nbb = 0;
+  uint32_t v_bi = 0;
+  uint64_t v_color = 0;
+  uint32_t v_c0 = 0;
+  uint32_t v_multiplier = 0;
+  uint32_t v_which = 0;
+  uint32_t v_offset = 0;
+  uint32_t v_y = 0;
+  uint32_t v_x = 0;
+  uint32_t v_shift = 0;
+  uint32_t v_delta = 0;
+  uint32_t v_c11 = 0;
+  uint32_t v_c16 = 0;
+  uint32_t v_o = 0;
+
+  v_nbb = wuffs_base__u32__min(self->private_impl.f_num_buffered_blocks, 32u);
+  while (v_bi < v_nbb) {
+    if (a_input == 0u) {
+      v_color = self->private_data.f_alphas[v_bi];
+    } else {
+      v_color = self->private_data.f_colors[v_bi];
+    }
+    v_c0 = ((((uint32_t)((v_color >> 56u))) * 8u) + 4u);
+    v_multiplier = ((uint32_t)(((v_color >> 52u) & 15u)));
+    v_multiplier *= 8u;
+    if (v_multiplier == 0u) {
+      v_multiplier = 1u;
+    }
+    v_which = ((uint32_t)(((v_color >> 48u) & 15u)));
+    v_offset = ((4u * a_dst_bytes_per_pixel * v_bi) + a_offset_adjustment);
+    v_y = 0u;
+    while (v_y < 4u) {
+      v_x = 0u;
+      while (v_x < 4u) {
+        v_shift = ((((v_x ^ 3u) * 4u) | (v_y ^ 3u)) * 3u);
+        v_delta = ((uint32_t)(v_multiplier * WUFFS_ETC2__ALPHA_MODIFIERS[v_which][(7u & (v_color >> v_shift))]));
+        v_c11 = ((uint32_t)(v_c0 + v_delta));
+        if (v_c11 >= 2147483648u) {
+          v_c11 = 0u;
+        } else if (v_c11 > 2047u) {
+          v_c11 = 2047u;
+        }
+        v_c16 = ((v_c11 << 5u) | (v_c11 >> 6u));
+        v_o = (v_offset + (v_x * a_dst_bytes_per_pixel) + (v_y * 1024u));
+        self->private_data.f_buffer[(v_o + 0u)] = ((uint8_t)((v_c16 >> 0u)));
+        self->private_data.f_buffer[(v_o + 1u)] = ((uint8_t)((v_c16 >> 8u)));
+        v_x += 1u;
+      }
+      v_y += 1u;
+    }
+    v_bi += 1u;
+  }
+  return wuffs_base__make_empty_struct();
+}
+
+// -------- func etc2.decoder.from_colors_to_buffer_signed
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__from_colors_to_buffer_signed(
+    wuffs_etc2__decoder* self,
+    uint32_t a_input,
+    uint32_t a_dst_bytes_per_pixel,
+    uint32_t a_offset_adjustment) {
+  uint32_t v_nbb = 0;
+  uint32_t v_bi = 0;
+  uint64_t v_color = 0;
+  uint32_t v_c0 = 0;
+  uint32_t v_multiplier = 0;
+  uint32_t v_which = 0;
+  uint32_t v_offset = 0;
+  uint32_t v_y = 0;
+  uint32_t v_x = 0;
+  uint32_t v_shift = 0;
+  uint32_t v_delta = 0;
+  uint32_t v_c11 = 0;
+  uint32_t v_c16 = 0;
+  uint32_t v_o = 0;
+
+  v_nbb = wuffs_base__u32__min(self->private_impl.f_num_buffered_blocks, 32u);
+  while (v_bi < v_nbb) {
+    if (a_input == 0u) {
+      v_color = self->private_data.f_alphas[v_bi];
+    } else {
+      v_color = self->private_data.f_colors[v_bi];
+    }
+    v_c0 = (((uint32_t)((v_color >> 56u))) * 8u);
+    if (v_c0 < 1024u) {
+    } else if (v_c0 == 1024u) {
+      v_c0 = 4294966280u;
+    } else {
+      v_c0 -= 2048u;
+    }
+    v_multiplier = ((uint32_t)(((v_color >> 52u) & 15u)));
+    v_multiplier *= 8u;
+    if (v_multiplier == 0u) {
+      v_multiplier = 1u;
+    }
+    v_which = ((uint32_t)(((v_color >> 48u) & 15u)));
+    v_offset = ((4u * a_dst_bytes_per_pixel * v_bi) + a_offset_adjustment);
+    v_y = 0u;
+    while (v_y < 4u) {
+      v_x = 0u;
+      while (v_x < 4u) {
+        v_shift = ((((v_x ^ 3u) * 4u) | (v_y ^ 3u)) * 3u);
+        v_delta = ((uint32_t)(v_multiplier * WUFFS_ETC2__ALPHA_MODIFIERS[v_which][(7u & (v_color >> v_shift))]));
+        v_c11 = ((uint32_t)(v_c0 + v_delta));
+        if (v_c11 <= 1023u) {
+        } else if (v_c11 < 2147483648u) {
+          v_c11 = 1023u;
+        } else if (v_c11 < 4294966273u) {
+          v_c11 = 4294966273u;
+        }
+        if (v_c11 < 2147483648u) {
+          v_c16 = (((uint32_t)(v_c11 << 5u)) | (v_c11 >> 5u));
+        } else {
+          v_c11 = ((uint32_t)(0u - v_c11));
+          v_c16 = (((uint32_t)(v_c11 << 5u)) | (v_c11 >> 5u));
+          v_c16 = ((uint32_t)(0u - v_c16));
+        }
+        v_c16 ^= 32768u;
+        v_o = (v_offset + (v_x * a_dst_bytes_per_pixel) + (v_y * 1024u));
+        self->private_data.f_buffer[(v_o + 0u)] = ((uint8_t)((v_c16 >> 0u)));
+        self->private_data.f_buffer[(v_o + 1u)] = ((uint8_t)((v_c16 >> 8u)));
+        v_x += 1u;
+      }
+      v_y += 1u;
+    }
+    v_bi += 1u;
+  }
+  return wuffs_base__make_empty_struct();
+}
+
 // -------- func etc2.decoder.from_buffer_to_dst
 
 WUFFS_BASE__GENERATED_C_CODE
@@ -45233,6 +45496,7 @@
 wuffs_etc2__decoder__from_buffer_to_dst(
     wuffs_etc2__decoder* self,
     wuffs_base__pixel_buffer* a_dst) {
+  uint32_t v_src_bytes_per_pixel = 0;
   wuffs_base__pixel_format v_dst_pixfmt = {0};
   uint32_t v_dst_bits_per_pixel = 0;
   uint32_t v_dst_bytes_per_pixel = 0;
@@ -45248,6 +45512,13 @@
   uint64_t v_i = 0;
   uint32_t v_num_src_pixels = 0;
 
+  if (self->private_impl.f_pixfmt == 536870923u) {
+    v_src_bytes_per_pixel = 2u;
+  } else if (self->private_impl.f_pixfmt == 2164308923u) {
+    v_src_bytes_per_pixel = 8u;
+  } else {
+    v_src_bytes_per_pixel = 4u;
+  }
   v_dst_pixfmt = wuffs_base__pixel_buffer__pixel_format(a_dst);
   v_dst_bits_per_pixel = wuffs_base__pixel_format__bits_per_pixel(&v_dst_pixfmt);
   if ((v_dst_bits_per_pixel & 7u) != 0u) {
@@ -45269,13 +45540,21 @@
     }
     v_dy = 0u;
     while (v_dy < 4u) {
-      v_si = ((1024u * v_dy) + (16u * v_bi));
-      v_sj = ((1024u * v_dy) + 1024u);
+      if (v_src_bytes_per_pixel == 4u) {
+        v_si = ((1024u * v_dy) + (16u * v_bi));
+        v_sj = ((1024u * v_dy) + 1024u);
+      } else if (v_src_bytes_per_pixel < 4u) {
+        v_si = ((1024u * v_dy) + (8u * v_bi));
+        v_sj = ((1024u * v_dy) + 256u);
+      } else {
+        v_si = ((1024u * v_dy) + (32u * (v_bi & 31u)));
+        v_sj = ((1024u * v_dy) + 1024u);
+      }
       if (v_si < v_sj) {
         v_src = wuffs_base__make_slice_u8_ij(self->private_data.f_buffer, v_si, v_sj);
       }
-      if (((uint64_t)((4u * v_rem_x))) < ((uint64_t)(v_src.len))) {
-        v_src = wuffs_base__slice_u8__subslice_j(v_src, ((uint64_t)((4u * v_rem_x))));
+      if (((uint64_t)((v_src_bytes_per_pixel * v_rem_x))) < ((uint64_t)(v_src.len))) {
+        v_src = wuffs_base__slice_u8__subslice_j(v_src, ((uint64_t)((v_src_bytes_per_pixel * v_rem_x))));
       }
       if (((uint32_t)(self->private_impl.f_dst_y + v_dy)) >= self->private_impl.f_height) {
         break;
@@ -45290,9 +45569,9 @@
       }
       v_dy += 1u;
     }
-    v_num_src_pixels = (((uint32_t)(((uint64_t)(v_src.len)))) / 4u);
+    v_num_src_pixels = (((uint32_t)(((uint64_t)(v_src.len)))) / v_src_bytes_per_pixel);
     self->private_impl.f_dst_x += v_num_src_pixels;
-    v_bi += ((v_num_src_pixels + 3u) / 4u);
+    v_bi += (((uint32_t)(v_num_src_pixels + 3u)) / 4u);
   }
   return wuffs_base__make_status(NULL);
 }
diff --git a/std/etc2/decode_etc2.wuffs b/std/etc2/decode_etc2.wuffs
index 27d241f..db417cc 100644
--- a/std/etc2/decode_etc2.wuffs
+++ b/std/etc2/decode_etc2.wuffs
@@ -41,7 +41,6 @@
 
 pub status "#bad header"
 pub status "#truncated input"
-pub status "#unsupported ETC2 file"
 
 pub const DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE : base.u64 = 0
 
@@ -93,6 +92,7 @@
 
 pri func decoder.do_decode_image_config?(dst: nptr base.image_config, src: base.io_reader) {
     var c32               : base.u32
+    var i                 : base.u32
     var rounded_up_width  : base.u16
     var rounded_up_height : base.u16
 
@@ -116,13 +116,34 @@
     } else if (c32 == '20\x00\x04'le) or  // ETC2  RGBA1.
             (c32 == '20\x00\x0B'le) {  //    ETC2 sRGBA1.
         this.pixfmt = base.PIXEL_FORMAT__BGRA_BINARY
-    } else if (c32 & 0xFFFF) == '20'le {
-        return "#unsupported ETC2 file"
+    } else if (c32 == '20\x00\x05'le) {  //  ETC2  R11U.
+        this.pixfmt = base.PIXEL_FORMAT__Y_16LE
+        choose from_colors_to_buffer = [from_colors_to_buffer_r11u]
+    } else if (c32 == '20\x00\x06'le) {  //  ETC2  RG11U.
+        this.pixfmt = base.PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE
+        choose from_colors_to_buffer = [from_colors_to_buffer_rg11u]
+    } else if (c32 == '20\x00\x07'le) {  //  ETC2  R11S.
+        this.pixfmt = base.PIXEL_FORMAT__Y_16LE
+        choose from_colors_to_buffer = [from_colors_to_buffer_r11s]
+    } else if (c32 == '20\x00\x08'le) {  //  ETC2  RG11S.
+        this.pixfmt = base.PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE
+        choose from_colors_to_buffer = [from_colors_to_buffer_rg11s]
     } else {
         return "#bad header"
     }
     this.srgb = (c32 >> 24) >= 0x09
 
+    if this.pixfmt == base.PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE {
+        i = 0
+        while i <= (4096 - 8) {
+            this.buffer[i + 0] = 0x00
+            this.buffer[i + 1] = 0x00
+            this.buffer[i + 6] = 0xFF
+            this.buffer[i + 7] = 0xFF
+            i += 8
+        }
+    }
+
     rounded_up_width = args.src.read_u16be?()
     rounded_up_height = args.src.read_u16be?()
 
@@ -214,6 +235,7 @@
 
 pri func decoder.do_decode_frame?(dst: ptr base.pixel_buffer, src: base.io_reader, blend: base.pixel_blend, workbuf: slice base.u8, opts: nptr base.decode_frame_options) {
     var remaining : base.u32[..= 0x1000_0000]
+    var max_nbb   : base.u32[..= 64]
     var status    : base.status
 
     if this.call_sequence == 0x40 {
@@ -239,7 +261,12 @@
 
     remaining = ((this.width + 3) / 4) * ((this.height + 3) / 4)
     while remaining > 0 {
-        this.num_buffered_blocks = remaining.min(no_more_than: 64)
+        max_nbb = 64
+        if ((this.pixfmt == base.PIXEL_FORMAT__Y_16LE)) or
+                (this.pixfmt == base.PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE) {
+            max_nbb = 32
+        }
+        this.num_buffered_blocks = remaining.min(no_more_than: max_nbb)
         if remaining < this.num_buffered_blocks {
             return base."#too much data"
         }
@@ -264,7 +291,8 @@
 
     while bi < this.num_buffered_blocks {
         assert bi < 64 via "a < b: a < c; c <= b"(c: this.num_buffered_blocks)
-        if this.pixfmt == base.PIXEL_FORMAT__BGRA_NONPREMUL {
+        if (this.pixfmt == base.PIXEL_FORMAT__BGRA_NONPREMUL) or
+                (this.pixfmt == base.PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE) {
             this.alphas[bi] = args.src.read_u64be?()
         }
         this.colors[bi] = args.src.read_u64be?()
@@ -272,7 +300,9 @@
     }
 }
 
-pri func decoder.from_colors_to_buffer!() {
+pri func decoder.from_colors_to_buffer!(),
+        choosy,
+{
     var bi    : base.u32[..= 64]
     var color : base.u64
     var r0    : base.u32
@@ -781,7 +811,172 @@
     }
 }
 
+pri func decoder.from_colors_to_buffer_r11u!() {
+    this.from_colors_to_buffer_unsigned!(input: 1, dst_bytes_per_pixel: 2, offset_adjustment: 0)
+}
+
+pri func decoder.from_colors_to_buffer_r11s!() {
+    this.from_colors_to_buffer_signed!(input: 1, dst_bytes_per_pixel: 2, offset_adjustment: 0)
+}
+
+pri func decoder.from_colors_to_buffer_rg11u!() {
+    this.from_colors_to_buffer_unsigned!(input: 0, dst_bytes_per_pixel: 8, offset_adjustment: 4)
+    this.from_colors_to_buffer_unsigned!(input: 1, dst_bytes_per_pixel: 8, offset_adjustment: 2)
+}
+
+pri func decoder.from_colors_to_buffer_rg11s!() {
+    this.from_colors_to_buffer_signed!(input: 0, dst_bytes_per_pixel: 8, offset_adjustment: 4)
+    this.from_colors_to_buffer_signed!(input: 1, dst_bytes_per_pixel: 8, offset_adjustment: 2)
+}
+
+pri func decoder.from_colors_to_buffer_unsigned!(input: base.u32[..= 1], dst_bytes_per_pixel: base.u32[..= 8], offset_adjustment: base.u32[..= 4]) {
+    var nbb        : base.u32[..= 32]
+    var bi         : base.u32[..= 32]
+    var color      : base.u64
+    var c0         : base.u32
+    var multiplier : base.u32[..= 120]
+    var which      : base.u32[..= 15]
+    var offset     : base.u32[..= 996]
+    var y          : base.u32
+    var x          : base.u32
+    var shift      : base.u32[..= 45]
+    var delta      : base.u32
+    var c11        : base.u32
+    var c16        : base.u32
+    var o          : base.u32[..= 4092]
+
+    nbb = this.num_buffered_blocks.min(no_more_than: 32)
+    while bi < nbb {
+        assert bi < 32 via "a < b: a < c; c <= b"(c: nbb)
+        if args.input == 0 {
+            color = this.alphas[bi]
+        } else {
+            color = this.colors[bi]
+        }
+
+        c0 = (((color >> 56) as base.u32) * 8) + 4
+        multiplier = ((color >> 52) & 0x0F) as base.u32
+        multiplier *= 8
+        if multiplier == 0 {
+            multiplier = 1
+        }
+        which = ((color >> 48) & 0x0F) as base.u32
+
+        offset = (4 * args.dst_bytes_per_pixel * bi) + args.offset_adjustment
+        y = 0
+        while y < 4,
+                inv bi < 32,
+        {
+            x = 0
+            while x < 4,
+                    inv bi < 32,
+                    inv y < 4,
+            {
+                shift = (((x ^ 3) * 4) | (y ^ 3)) * 3
+                delta = multiplier ~mod* ALPHA_MODIFIERS[which][7 & (color >> shift)]
+                c11 = c0 ~mod+ delta
+                if c11 >= 0x8000_0000 {
+                    c11 = 0
+                    assert c11 <= 2047
+                } else if c11 > 2047 {
+                    c11 = 2047
+                    assert c11 <= 2047
+                }
+                c16 = (c11 << 5) | (c11 >> 6)
+                o = offset + (x * args.dst_bytes_per_pixel) + (y * 1024)
+                this.buffer[o + 0] = ((c16 >> 0) & 0xFF) as base.u8
+                this.buffer[o + 1] = ((c16 >> 8) & 0xFF) as base.u8
+                x += 1
+            }
+            y += 1
+        }
+
+        bi += 1
+    }
+}
+
+pri func decoder.from_colors_to_buffer_signed!(input: base.u32[..= 1], dst_bytes_per_pixel: base.u32[..= 8], offset_adjustment: base.u32[..= 4]) {
+    var nbb        : base.u32[..= 32]
+    var bi         : base.u32[..= 32]
+    var color      : base.u64
+    var c0         : base.u32
+    var multiplier : base.u32[..= 120]
+    var which      : base.u32[..= 15]
+    var offset     : base.u32[..= 996]
+    var y          : base.u32
+    var x          : base.u32
+    var shift      : base.u32[..= 45]
+    var delta      : base.u32
+    var c11        : base.u32
+    var c16        : base.u32
+    var o          : base.u32[..= 4092]
+
+    nbb = this.num_buffered_blocks.min(no_more_than: 32)
+    while bi < nbb {
+        assert bi < 32 via "a < b: a < c; c <= b"(c: nbb)
+        if args.input == 0 {
+            color = this.alphas[bi]
+        } else {
+            color = this.colors[bi]
+        }
+
+        c0 = ((color >> 56) as base.u32) * 8
+        if c0 < (0x80 * 8) {
+            // No-op.
+        } else if c0 == (0x80 * 8) {
+            c0 = 0xFFFF_FC08
+        } else {
+            c0 ~mod-= (0x100 * 8)
+        }
+        multiplier = ((color >> 52) & 0x0F) as base.u32
+        multiplier *= 8
+        if multiplier == 0 {
+            multiplier = 1
+        }
+        which = ((color >> 48) & 0x0F) as base.u32
+
+        offset = (4 * args.dst_bytes_per_pixel * bi) + args.offset_adjustment
+        y = 0
+        while y < 4,
+                inv bi < 32,
+        {
+            x = 0
+            while x < 4,
+                    inv bi < 32,
+                    inv y < 4,
+            {
+                shift = (((x ^ 3) * 4) | (y ^ 3)) * 3
+                delta = multiplier ~mod* ALPHA_MODIFIERS[which][7 & (color >> shift)]
+                c11 = c0 ~mod+ delta
+                if c11 <= 0x0000_03FF {  //       0x0000_03FF is +1023.
+                    // No-op.
+                } else if c11 < 0x8000_0000 {
+                    c11 = 0x0000_03FF
+                } else if c11 < 0xFFFF_FC01 {  // 0xFFFF_FC01 is -1023.
+                    c11 = 0xFFFF_FC01
+                }
+                if c11 < 0x8000_0000 {
+                    c16 = (c11 ~mod<< 5) | (c11 >> 5)
+                } else {
+                    c11 = 0 ~mod- c11
+                    c16 = (c11 ~mod<< 5) | (c11 >> 5)
+                    c16 = 0 ~mod- c16
+                }
+                c16 ^= 0x8000
+                o = offset + (x * args.dst_bytes_per_pixel) + (y * 1024)
+                this.buffer[o + 0] = ((c16 >> 0) & 0xFF) as base.u8
+                this.buffer[o + 1] = ((c16 >> 8) & 0xFF) as base.u8
+                x += 1
+            }
+            y += 1
+        }
+
+        bi += 1
+    }
+}
+
 pri func decoder.from_buffer_to_dst!(dst: ptr base.pixel_buffer) base.status {
+    var src_bytes_per_pixel : base.u32[..= 8]
     var dst_pixfmt          : base.pixel_format
     var dst_bits_per_pixel  : base.u32[..= 256]
     var dst_bytes_per_pixel : base.u32[..= 32]
@@ -797,6 +992,17 @@
     var i                   : base.u64
     var num_src_pixels      : base.u32
 
+    if this.pixfmt == base.PIXEL_FORMAT__Y_16LE {
+        src_bytes_per_pixel = 2
+        assert src_bytes_per_pixel > 0
+    } else if this.pixfmt == base.PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE {
+        src_bytes_per_pixel = 8
+        assert src_bytes_per_pixel > 0
+    } else {
+        src_bytes_per_pixel = 4
+        assert src_bytes_per_pixel > 0
+    }
+
     // TODO: the dst_pixfmt variable shouldn't be necessary. We should be able
     // to chain the two calls: "args.dst.pixel_format().bits_per_pixel()".
     dst_pixfmt = args.dst.pixel_format()
@@ -808,7 +1014,9 @@
     dst_bytes_per_row = (this.width * dst_bytes_per_pixel) as base.u64
     tab = args.dst.plane(p: 0)
 
-    while bi < this.num_buffered_blocks {
+    while bi < this.num_buffered_blocks,
+            inv src_bytes_per_pixel > 0,
+    {
         assert bi < 64 via "a < b: a < c; c <= b"(c: this.num_buffered_blocks)
 
         if this.width <= this.dst_x {
@@ -824,15 +1032,24 @@
 
         dy = 0
         while dy < 4,
+                inv src_bytes_per_pixel > 0,
                 inv bi < 64,
         {
-            si = (1024 * dy) + (16 * bi)
-            sj = (1024 * dy) + (16 * 64)
+            if src_bytes_per_pixel == 4 {
+                si = (1024 * dy) + (16 * bi)
+                sj = (1024 * dy) + (16 * 64)  // 64 is max_nbb.
+            } else if src_bytes_per_pixel < 4 {
+                si = (1024 * dy) + (8 * bi)
+                sj = (1024 * dy) + (8 * 32)  //  32 is max_nbb.
+            } else {
+                si = (1024 * dy) + (32 * (bi & 31))
+                sj = (1024 * dy) + (32 * 32)  // 32 is max_nbb.
+            }
             if si < sj {
                 src = this.buffer[si .. sj]
             }
-            if ((4 * rem_x) as base.u64) < src.length() {
-                src = src[.. (4 * rem_x) as base.u64]
+            if ((src_bytes_per_pixel * rem_x) as base.u64) < src.length() {
+                src = src[.. (src_bytes_per_pixel * rem_x) as base.u64]
             }
 
             if (this.dst_y ~mod+ dy) >= this.height {
@@ -854,9 +1071,9 @@
             dy += 1
         }
 
-        num_src_pixels = ((src.length() & 0xFFFF_FFFF) as base.u32) / 4
+        num_src_pixels = ((src.length() & 0xFFFF_FFFF) as base.u32) / src_bytes_per_pixel
         this.dst_x ~mod+= num_src_pixels
-        bi ~mod+= (num_src_pixels + 3) / 4
+        bi ~mod+= (num_src_pixels ~mod+ 3) / 4
     }
 
     return ok