std/etc2: support ETC2PACKAGE_RGB_NO_MIPMAPS
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 1397c91..c819c72 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -9118,6 +9118,7 @@
     uint32_t f_height;
     uint32_t f_remaining_blocks;
     uint8_t f_call_sequence;
+    bool f_srgb;
     uint32_t f_buffer_index;
     uint32_t f_dst_x;
     uint32_t f_dst_y;
@@ -41022,6 +41023,11 @@
 };
 
 static const uint8_t
+WUFFS_ETC2__T_H_MODIFIERS[8] WUFFS_BASE__POTENTIALLY_UNUSED = {
+  3u, 6u, 11u, 16u, 23u, 32u, 41u, 64u,
+};
+
+static const uint8_t
 WUFFS_ETC2__CLAMP[1024] WUFFS_BASE__POTENTIALLY_UNUSED = {
   0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u,
   8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u,
@@ -41189,6 +41195,27 @@
 
 WUFFS_BASE__GENERATED_C_CODE
 static wuffs_base__empty_struct
+wuffs_etc2__decoder__decode_t_mode(
+    wuffs_etc2__decoder* self,
+    uint64_t a_bits,
+    uint32_t a_offset);
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__decode_h_mode(
+    wuffs_etc2__decoder* self,
+    uint64_t a_bits,
+    uint32_t a_offset);
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__decode_planar_mode(
+    wuffs_etc2__decoder* self,
+    uint64_t a_bits,
+    uint32_t a_offset);
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
 wuffs_etc2__decoder__decode_half_block(
     wuffs_etc2__decoder* self,
     uint32_t a_bits,
@@ -41519,7 +41546,7 @@
       }
       v_c32 = t_1;
     }
-    if (v_c32 == 12337u) {
+    if ((v_c32 == 12337u) || (v_c32 == 16789554u) || (v_c32 == 151007282u)) {
       self->private_impl.f_pixfmt = 2415954056u;
     } else if ((v_c32 & 65535u) == 12338u) {
       status = wuffs_base__make_status(wuffs_etc2__error__unsupported_etc2_file);
@@ -41528,6 +41555,7 @@
       status = wuffs_base__make_status(wuffs_etc2__error__bad_header);
       goto exit;
     }
+    self->private_impl.f_srgb = ((v_c32 >> 24u) >= 9u);
     {
       WUFFS_BASE__COROUTINE_SUSPENSION_POINT(5);
       uint16_t t_2;
@@ -42088,15 +42116,30 @@
         v_b1 = ((v_b1 << 4u) | v_b1);
       } else {
         v_r0 = ((uint32_t)((31u & (v_c64 >> 59u))));
-        v_r1 = (31u & ((uint32_t)(v_r0 + WUFFS_ETC2__DIFFS[(7u & (v_c64 >> 56u))])));
+        v_r1 = ((uint32_t)(v_r0 + WUFFS_ETC2__DIFFS[(7u & (v_c64 >> 56u))]));
+        if ((v_r1 >> 5u) != 0u) {
+          wuffs_etc2__decoder__decode_t_mode(self, v_c64, (16u * v_bi));
+          v_bi += 1u;
+          continue;
+        }
         v_r0 = (((uint32_t)(v_r0 << 3u)) | (v_r0 >> 2u));
         v_r1 = (((uint32_t)(v_r1 << 3u)) | (v_r1 >> 2u));
         v_g0 = ((uint32_t)((31u & (v_c64 >> 51u))));
-        v_g1 = (31u & ((uint32_t)(v_g0 + WUFFS_ETC2__DIFFS[(7u & (v_c64 >> 48u))])));
+        v_g1 = ((uint32_t)(v_g0 + WUFFS_ETC2__DIFFS[(7u & (v_c64 >> 48u))]));
+        if ((v_g1 >> 5u) != 0u) {
+          wuffs_etc2__decoder__decode_h_mode(self, v_c64, (16u * v_bi));
+          v_bi += 1u;
+          continue;
+        }
         v_g0 = (((uint32_t)(v_g0 << 3u)) | (v_g0 >> 2u));
         v_g1 = (((uint32_t)(v_g1 << 3u)) | (v_g1 >> 2u));
         v_b0 = ((uint32_t)((31u & (v_c64 >> 43u))));
-        v_b1 = (31u & ((uint32_t)(v_b0 + WUFFS_ETC2__DIFFS[(7u & (v_c64 >> 40u))])));
+        v_b1 = ((uint32_t)(v_b0 + WUFFS_ETC2__DIFFS[(7u & (v_c64 >> 40u))]));
+        if ((v_b1 >> 5u) != 0u) {
+          wuffs_etc2__decoder__decode_planar_mode(self, v_c64, (16u * v_bi));
+          v_bi += 1u;
+          continue;
+        }
         v_b0 = (((uint32_t)(v_b0 << 3u)) | (v_b0 >> 2u));
         v_b1 = (((uint32_t)(v_b1 << 3u)) | (v_b1 >> 2u));
       }
@@ -42144,6 +42187,197 @@
   return status;
 }
 
+// -------- func etc2.decoder.decode_t_mode
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__decode_t_mode(
+    wuffs_etc2__decoder* self,
+    uint64_t a_bits,
+    uint32_t a_offset) {
+  uint8_t v_r[4] = {0};
+  uint8_t v_g[4] = {0};
+  uint8_t v_b[4] = {0};
+  uint32_t v_which = 0;
+  uint32_t v_delta = 0;
+  uint32_t v_y = 0;
+  uint32_t v_x = 0;
+  uint32_t v_x4y = 0;
+  uint32_t v_i = 0;
+  uint32_t v_o = 0;
+
+  v_r[0u] = ((uint8_t)(((uint8_t)((12u & (a_bits >> 57u)))) | ((uint8_t)((3u & (a_bits >> 56u))))));
+  v_r[0u] = ((uint8_t)(((uint8_t)(v_r[0u] << 4u)) | v_r[0u]));
+  v_g[0u] = ((uint8_t)((15u & (a_bits >> 52u))));
+  v_g[0u] = ((uint8_t)(((uint8_t)(v_g[0u] << 4u)) | v_g[0u]));
+  v_b[0u] = ((uint8_t)((15u & (a_bits >> 48u))));
+  v_b[0u] = ((uint8_t)(((uint8_t)(v_b[0u] << 4u)) | v_b[0u]));
+  v_r[2u] = ((uint8_t)((15u & (a_bits >> 44u))));
+  v_r[2u] = ((uint8_t)(((uint8_t)(v_r[2u] << 4u)) | v_r[2u]));
+  v_g[2u] = ((uint8_t)((15u & (a_bits >> 40u))));
+  v_g[2u] = ((uint8_t)(((uint8_t)(v_g[2u] << 4u)) | v_g[2u]));
+  v_b[2u] = ((uint8_t)((15u & (a_bits >> 36u))));
+  v_b[2u] = ((uint8_t)(((uint8_t)(v_b[2u] << 4u)) | v_b[2u]));
+  v_which = (((uint32_t)((6u & (a_bits >> 33u)))) | ((uint32_t)((1u & (a_bits >> 32u)))));
+  v_delta = ((uint32_t)(WUFFS_ETC2__T_H_MODIFIERS[v_which]));
+  v_r[1u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_r[2u])) + v_delta)) & 1023u)];
+  v_g[1u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_g[2u])) + v_delta)) & 1023u)];
+  v_b[1u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_b[2u])) + v_delta)) & 1023u)];
+  v_r[3u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_r[2u])) - v_delta)) & 1023u)];
+  v_g[3u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_g[2u])) - v_delta)) & 1023u)];
+  v_b[3u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_b[2u])) - v_delta)) & 1023u)];
+  while (v_y < 4u) {
+    v_x = 0u;
+    while (v_x < 4u) {
+      v_x4y = ((v_x * 4u) | v_y);
+      v_i = (((uint32_t)(((a_bits >> v_x4y) & 1u))) | ((uint32_t)(((a_bits >> (v_x4y + 15u)) & 2u))));
+      v_o = (a_offset + (v_x * 4u) + (v_y * 1024u));
+      self->private_data.f_buffer[(v_o + 0u)] = v_b[v_i];
+      self->private_data.f_buffer[(v_o + 1u)] = v_g[v_i];
+      self->private_data.f_buffer[(v_o + 2u)] = v_r[v_i];
+      v_x += 1u;
+    }
+    v_y += 1u;
+  }
+  return wuffs_base__make_empty_struct();
+}
+
+// -------- func etc2.decoder.decode_h_mode
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__decode_h_mode(
+    wuffs_etc2__decoder* self,
+    uint64_t a_bits,
+    uint32_t a_offset) {
+  uint8_t v_r[4] = {0};
+  uint8_t v_g[4] = {0};
+  uint8_t v_b[4] = {0};
+  uint32_t v_rgb0 = 0;
+  uint32_t v_rgb2 = 0;
+  uint32_t v_which = 0;
+  uint32_t v_delta = 0;
+  uint32_t v_y = 0;
+  uint32_t v_x = 0;
+  uint32_t v_x4y = 0;
+  uint32_t v_i = 0;
+  uint32_t v_o = 0;
+
+  v_r[0u] = ((uint8_t)((15u & (a_bits >> 59u))));
+  v_r[0u] = ((uint8_t)(((uint8_t)(v_r[0u] << 4u)) | v_r[0u]));
+  v_g[0u] = ((uint8_t)(((uint8_t)((14u & (a_bits >> 55u)))) | ((uint8_t)((1u & (a_bits >> 52u))))));
+  v_g[0u] = ((uint8_t)(((uint8_t)(v_g[0u] << 4u)) | v_g[0u]));
+  v_b[0u] = ((uint8_t)(((uint8_t)((8u & (a_bits >> 48u)))) | ((uint8_t)((7u & (a_bits >> 47u))))));
+  v_b[0u] = ((uint8_t)(((uint8_t)(v_b[0u] << 4u)) | v_b[0u]));
+  v_r[2u] = ((uint8_t)((15u & (a_bits >> 43u))));
+  v_r[2u] = ((uint8_t)(((uint8_t)(v_r[2u] << 4u)) | v_r[2u]));
+  v_g[2u] = ((uint8_t)((15u & (a_bits >> 39u))));
+  v_g[2u] = ((uint8_t)(((uint8_t)(v_g[2u] << 4u)) | v_g[2u]));
+  v_b[2u] = ((uint8_t)((15u & (a_bits >> 35u))));
+  v_b[2u] = ((uint8_t)(((uint8_t)(v_b[2u] << 4u)) | v_b[2u]));
+  v_rgb0 = ((((uint32_t)(v_r[0u])) << 16u) | (((uint32_t)(v_g[0u])) << 8u) | (((uint32_t)(v_b[0u])) << 0u));
+  v_rgb2 = ((((uint32_t)(v_r[2u])) << 16u) | (((uint32_t)(v_g[2u])) << 8u) | (((uint32_t)(v_b[2u])) << 0u));
+  v_which = (((uint32_t)((4u & (a_bits >> 32u)))) | ((uint32_t)((2u & (a_bits >> 31u)))));
+  if (v_rgb0 >= v_rgb2) {
+    v_which |= 1u;
+  }
+  v_delta = ((uint32_t)(WUFFS_ETC2__T_H_MODIFIERS[v_which]));
+  v_r[1u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_r[0u])) - v_delta)) & 1023u)];
+  v_g[1u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_g[0u])) - v_delta)) & 1023u)];
+  v_b[1u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_b[0u])) - v_delta)) & 1023u)];
+  v_r[0u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_r[0u])) + v_delta)) & 1023u)];
+  v_g[0u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_g[0u])) + v_delta)) & 1023u)];
+  v_b[0u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_b[0u])) + v_delta)) & 1023u)];
+  v_r[3u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_r[2u])) - v_delta)) & 1023u)];
+  v_g[3u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_g[2u])) - v_delta)) & 1023u)];
+  v_b[3u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_b[2u])) - v_delta)) & 1023u)];
+  v_r[2u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_r[2u])) + v_delta)) & 1023u)];
+  v_g[2u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_g[2u])) + v_delta)) & 1023u)];
+  v_b[2u] = WUFFS_ETC2__CLAMP[(((uint32_t)(((uint32_t)(v_b[2u])) + v_delta)) & 1023u)];
+  while (v_y < 4u) {
+    v_x = 0u;
+    while (v_x < 4u) {
+      v_x4y = ((v_x * 4u) | v_y);
+      v_i = (((uint32_t)(((a_bits >> v_x4y) & 1u))) | ((uint32_t)(((a_bits >> (v_x4y + 15u)) & 2u))));
+      v_o = (a_offset + (v_x * 4u) + (v_y * 1024u));
+      self->private_data.f_buffer[(v_o + 0u)] = v_b[v_i];
+      self->private_data.f_buffer[(v_o + 1u)] = v_g[v_i];
+      self->private_data.f_buffer[(v_o + 2u)] = v_r[v_i];
+      v_x += 1u;
+    }
+    v_y += 1u;
+  }
+  return wuffs_base__make_empty_struct();
+}
+
+// -------- func etc2.decoder.decode_planar_mode
+
+WUFFS_BASE__GENERATED_C_CODE
+static wuffs_base__empty_struct
+wuffs_etc2__decoder__decode_planar_mode(
+    wuffs_etc2__decoder* self,
+    uint64_t a_bits,
+    uint32_t a_offset) {
+  uint32_t v_ro = 0;
+  uint32_t v_go = 0;
+  uint32_t v_bo = 0;
+  uint32_t v_rh = 0;
+  uint32_t v_gh = 0;
+  uint32_t v_bh = 0;
+  uint32_t v_rv = 0;
+  uint32_t v_gv = 0;
+  uint32_t v_bv = 0;
+  uint32_t v_y = 0;
+  uint32_t v_x = 0;
+  uint32_t v_o = 0;
+  uint32_t v_rp = 0;
+  uint32_t v_gp = 0;
+  uint32_t v_bp = 0;
+
+  v_ro = ((uint32_t)((63u & (a_bits >> 57u))));
+  v_ro = (((uint32_t)(v_ro << 2u)) | (v_ro >> 4u));
+  v_go = (((uint32_t)((64u & (a_bits >> 50u)))) | ((uint32_t)((63u & (a_bits >> 49u)))));
+  v_go = (((uint32_t)(v_go << 1u)) | (v_go >> 6u));
+  v_bo = (((uint32_t)((32u & (a_bits >> 43u)))) | ((uint32_t)((24u & (a_bits >> 40u)))) | ((uint32_t)((7u & (a_bits >> 39u)))));
+  v_bo = (((uint32_t)(v_bo << 2u)) | (v_bo >> 4u));
+  v_rh = (((uint32_t)((62u & (a_bits >> 33u)))) | ((uint32_t)((1u & (a_bits >> 32u)))));
+  v_rh = (((uint32_t)(v_rh << 2u)) | (v_rh >> 4u));
+  v_gh = ((uint32_t)((127u & (a_bits >> 25u))));
+  v_gh = (((uint32_t)(v_gh << 1u)) | (v_gh >> 6u));
+  v_bh = ((uint32_t)((63u & (a_bits >> 19u))));
+  v_bh = (((uint32_t)(v_bh << 2u)) | (v_bh >> 4u));
+  v_rv = ((uint32_t)((63u & (a_bits >> 13u))));
+  v_rv = (((uint32_t)(v_rv << 2u)) | (v_rv >> 4u));
+  v_gv = ((uint32_t)((127u & (a_bits >> 6u))));
+  v_gv = (((uint32_t)(v_gv << 1u)) | (v_gv >> 6u));
+  v_bv = ((uint32_t)((63u & (a_bits >> 0u))));
+  v_bv = (((uint32_t)(v_bv << 2u)) | (v_bv >> 4u));
+  v_rh -= v_ro;
+  v_gh -= v_go;
+  v_bh -= v_bo;
+  v_rv -= v_ro;
+  v_gv -= v_go;
+  v_bv -= v_bo;
+  v_ro *= 4u;
+  v_go *= 4u;
+  v_bo *= 4u;
+  while (v_y < 4u) {
+    v_x = 0u;
+    while (v_x < 4u) {
+      v_o = (a_offset + (v_x * 4u) + (v_y * 1024u));
+      v_bp = ((uint32_t)(((uint32_t)(((uint32_t)(v_x * v_bh)) + ((uint32_t)(v_y * v_bv)))) + v_bo));
+      self->private_data.f_buffer[(v_o + 0u)] = WUFFS_ETC2__CLAMP[((((uint32_t)(v_bp + 2u)) / 4u) & 1023u)];
+      v_gp = ((uint32_t)(((uint32_t)(((uint32_t)(v_x * v_gh)) + ((uint32_t)(v_y * v_gv)))) + v_go));
+      self->private_data.f_buffer[(v_o + 1u)] = WUFFS_ETC2__CLAMP[((((uint32_t)(v_gp + 2u)) / 4u) & 1023u)];
+      v_rp = ((uint32_t)(((uint32_t)(((uint32_t)(v_x * v_rh)) + ((uint32_t)(v_y * v_rv)))) + v_ro));
+      self->private_data.f_buffer[(v_o + 2u)] = WUFFS_ETC2__CLAMP[((((uint32_t)(v_rp + 2u)) / 4u) & 1023u)];
+      v_x += 1u;
+    }
+    v_y += 1u;
+  }
+  return wuffs_base__make_empty_struct();
+}
+
 // -------- func etc2.decoder.decode_half_block
 
 WUFFS_BASE__GENERATED_C_CODE
diff --git a/std/etc2/decode_etc2.wuffs b/std/etc2/decode_etc2.wuffs
index 89b088c..b8cbbb1 100644
--- a/std/etc2/decode_etc2.wuffs
+++ b/std/etc2/decode_etc2.wuffs
@@ -47,6 +47,8 @@
         // (/doc/std/image-decoders-call-sequence.md).
         call_sequence : base.u8,
 
+        srgb : base.bool,
+
         // buffer_index is measured in blocks.
         buffer_index : base.u32[..= 64],
 
@@ -95,13 +97,16 @@
     }
 
     c32 = args.src.read_u32le?()
-    if c32 == '10\x00\x00'le {
+    if (c32 == '10\x00\x00'le) or  //      ETC1.
+            (c32 == '20\x00\x01'le) or  // ETC2  RGB.
+            (c32 == '20\x00\x09'le) {  //  ETC2 sRGB.
         this.pixfmt = base.PIXEL_FORMAT__BGRX
     } else if (c32 & 0xFFFF) == '20'le {
         return "#unsupported ETC2 file"
     } else {
         return "#bad header"
     }
+    this.srgb = (c32 >> 24) >= 0x09
 
     rounded_up_width = args.src.read_u16be?()
     rounded_up_height = args.src.read_u16be?()
@@ -248,6 +253,12 @@
         c64 = args.src.read_u64be?()
 
         if (c64 & 0x2_0000_0000) == 0 {
+            // The high 32 bits of c64:
+            //
+            // 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32  dec
+            // 3F 3E 3D 3C 3B 3A 39 38 37 36 35 34 33 32 31 30 2F 2E 2D 2C 2B 2A 29 28 27 26 25 24 23 22 21 20  hex
+            // R0 r0 r0 r0 R1 r1 r1 r1 G0 g0 g0 g0 G1 g1 g1 g1 B0 b0 b0 b0 B1 b1 b1 b1 W0 w0 w0 W1 w1 w1 00 Fl
+
             r0 = (0x0F & (c64 >> 0x3C)) as base.u32
             r0 = (r0 << 4) | r0
             r1 = (0x0F & (c64 >> 0x38)) as base.u32
@@ -264,18 +275,45 @@
             b1 = (b1 << 4) | b1
 
         } else {
+            // The high 32 bits of c64 (unless ETC2's T / H / Planar modes):
+            //
+            // 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32  dec
+            // 3F 3E 3D 3C 3B 3A 39 38 37 36 35 34 33 32 31 30 2F 2E 2D 2C 2B 2A 29 28 27 26 25 24 23 22 21 20  hex
+            // R0 r0 r0 r0 r0 R1 r1 r1 G0 g0 g0 g0 g0 G1 g1 g1 B0 b0 b0 b0 b0 B1 b1 b1 W0 w0 w0 W1 w1 w1 01 Fl
+
             r0 = (0x1F & (c64 >> 0x3B)) as base.u32
-            r1 = (0x1F & (r0 ~mod+ DIFFS[0x07 & (c64 >> 0x38)]))
+            r1 = r0 ~mod+ DIFFS[0x07 & (c64 >> 0x38)]
+            if (r1 >> 5) <> 0 {
+                this.decode_t_mode!(
+                        bits: c64,
+                        offset: 16 * bi)
+                bi += 1
+                continue
+            }
             r0 = (r0 ~mod<< 3) | (r0 >> 2)
             r1 = (r1 ~mod<< 3) | (r1 >> 2)
 
             g0 = (0x1F & (c64 >> 0x33)) as base.u32
-            g1 = (0x1F & (g0 ~mod+ DIFFS[0x07 & (c64 >> 0x30)]))
+            g1 = g0 ~mod+ DIFFS[0x07 & (c64 >> 0x30)]
+            if (g1 >> 5) <> 0 {
+                this.decode_h_mode!(
+                        bits: c64,
+                        offset: 16 * bi)
+                bi += 1
+                continue
+            }
             g0 = (g0 ~mod<< 3) | (g0 >> 2)
             g1 = (g1 ~mod<< 3) | (g1 >> 2)
 
             b0 = (0x1F & (c64 >> 0x2B)) as base.u32
-            b1 = (0x1F & (b0 ~mod+ DIFFS[0x07 & (c64 >> 0x28)]))
+            b1 = b0 ~mod+ DIFFS[0x07 & (c64 >> 0x28)]
+            if (b1 >> 5) <> 0 {
+                this.decode_planar_mode!(
+                        bits: c64,
+                        offset: 16 * bi)
+                bi += 1
+                continue
+            }
             b0 = (b0 ~mod<< 3) | (b0 >> 2)
             b1 = (b1 ~mod<< 3) | (b1 >> 2)
         }
@@ -306,6 +344,264 @@
     this.buffer_index = bi
 }
 
+pri func decoder.decode_t_mode!(
+        bits: base.u64,
+        offset: base.u32[..= 1008]) {
+    var r : array[4] base.u8
+    var g : array[4] base.u8
+    var b : array[4] base.u8
+
+    var which : base.u32[..= 7]
+    var delta : base.u32
+
+    var y   : base.u32
+    var x   : base.u32
+    var x4y : base.u32[..= 15]
+    var i   : base.u32[..= 3]
+    var o   : base.u32[..= 4092]
+
+    // Unjumble T-mode's 59 bits and convert from RGB444 to RGB888.
+    //
+    // 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32  dec
+    // 3F 3E 3D 3C 3B 3A 39 38 37 36 35 34 33 32 31 30 2F 2E 2D 2C 2B 2A 29 28 27 26 25 24 23 22 21 20  hex
+    // .. .. .. R0 r0 .. r0 r0 G0 g0 g0 g0 B0 b0 b0 b0 R2 r2 r2 r2 G2 g2 g2 g2 B2 b2 b2 b2 Wh wh .. wh
+    //
+    // The low 32 bits hold "2 bits per pixel" indexes into the r, g, b arrays.
+
+    r[0] = ((0x0C & (args.bits >> 0x39)) as base.u8) |
+            ((0x03 & (args.bits >> 0x38)) as base.u8)
+    r[0] = (r[0] << 4) | r[0]
+    g[0] = (0x0F & (args.bits >> 0x34)) as base.u8
+    g[0] = (g[0] << 4) | g[0]
+    b[0] = (0x0F & (args.bits >> 0x30)) as base.u8
+    b[0] = (b[0] << 4) | b[0]
+
+    r[2] = (0x0F & (args.bits >> 0x2C)) as base.u8
+    r[2] = (r[2] << 4) | r[2]
+    g[2] = (0x0F & (args.bits >> 0x28)) as base.u8
+    g[2] = (g[2] << 4) | g[2]
+    b[2] = (0x0F & (args.bits >> 0x24)) as base.u8
+    b[2] = (b[2] << 4) | b[2]
+
+    // Modulate.
+
+    which = ((0x06 & (args.bits >> 0x21)) as base.u32) |
+            ((0x01 & (args.bits >> 0x20)) as base.u32)
+    delta = T_H_MODIFIERS[which] as base.u32
+
+    r[1] = CLAMP[((r[2] as base.u32) ~mod+ delta) & 1023]
+    g[1] = CLAMP[((g[2] as base.u32) ~mod+ delta) & 1023]
+    b[1] = CLAMP[((b[2] as base.u32) ~mod+ delta) & 1023]
+
+    r[3] = CLAMP[((r[2] as base.u32) ~mod- delta) & 1023]
+    g[3] = CLAMP[((g[2] as base.u32) ~mod- delta) & 1023]
+    b[3] = CLAMP[((b[2] as base.u32) ~mod- delta) & 1023]
+
+    // Paint the 4×4 block.
+
+    while y < 4 {
+        x = 0
+        while x < 4,
+                inv y < 4,
+        {
+            x4y = (x * 4) | y
+            i = (((args.bits >> x4y) & 1) as base.u32) |
+                    (((args.bits >> (x4y + 15)) & 2) as base.u32)
+            o = args.offset + (x * 4) + (y * 1024)
+            this.buffer[o + 0] = b[i]
+            this.buffer[o + 1] = g[i]
+            this.buffer[o + 2] = r[i]
+            x += 1
+        }
+        y += 1
+    }
+}
+
+pri func decoder.decode_h_mode!(
+        bits: base.u64,
+        offset: base.u32[..= 1008]) {
+    var r : array[4] base.u8
+    var g : array[4] base.u8
+    var b : array[4] base.u8
+
+    var rgb0  : base.u32
+    var rgb2  : base.u32
+    var which : base.u32[..= 7]
+    var delta : base.u32
+
+    var y   : base.u32
+    var x   : base.u32
+    var x4y : base.u32[..= 15]
+    var i   : base.u32[..= 3]
+    var o   : base.u32[..= 4092]
+
+    // Unjumble H-mode's 58 bits (one of the 'which' bits is implied by order)
+    // and convert from RGB444 to RGB888.
+    //
+    // 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32  dec
+    // 3F 3E 3D 3C 3B 3A 39 38 37 36 35 34 33 32 31 30 2F 2E 2D 2C 2B 2A 29 28 27 26 25 24 23 22 21 20  hex
+    // .. R0 r0 r0 r0 G0 g0 g0 .. .. .. g0 B0 .. b0 b0 b0 R2 r2 r2 r2 G2 g2 g2 g2 B2 b2 b2 b2 Wh .. wh
+    //
+    // The low 32 bits hold "2 bits per pixel" indexes into the r, g, b arrays.
+
+    r[0] = (0x0F & (args.bits >> 0x3B)) as base.u8
+    r[0] = (r[0] << 4) | r[0]
+    g[0] = ((0x0E & (args.bits >> 0x37)) as base.u8) |
+            ((0x01 & (args.bits >> 0x34)) as base.u8)
+    g[0] = (g[0] << 4) | g[0]
+    b[0] = ((0x08 & (args.bits >> 0x30)) as base.u8) |
+            ((0x07 & (args.bits >> 0x2F)) as base.u8)
+    b[0] = (b[0] << 4) | b[0]
+
+    r[2] = (0x0F & (args.bits >> 0x2B)) as base.u8
+    r[2] = (r[2] << 4) | r[2]
+    g[2] = (0x0F & (args.bits >> 0x27)) as base.u8
+    g[2] = (g[2] << 4) | g[2]
+    b[2] = (0x0F & (args.bits >> 0x23)) as base.u8
+    b[2] = (b[2] << 4) | b[2]
+
+    // Modulate.
+
+    rgb0 = ((r[0] as base.u32) << 0x10) |
+            ((g[0] as base.u32) << 0x08) |
+            ((b[0] as base.u32) << 0x00)
+    rgb2 = ((r[2] as base.u32) << 0x10) |
+            ((g[2] as base.u32) << 0x08) |
+            ((b[2] as base.u32) << 0x00)
+    which = ((0x04 & (args.bits >> 0x20)) as base.u32) |
+            ((0x02 & (args.bits >> 0x1F)) as base.u32)
+    if rgb0 >= rgb2 {
+        which |= 1
+    }
+    delta = T_H_MODIFIERS[which] as base.u32
+
+    r[1] = CLAMP[((r[0] as base.u32) ~mod- delta) & 1023]
+    g[1] = CLAMP[((g[0] as base.u32) ~mod- delta) & 1023]
+    b[1] = CLAMP[((b[0] as base.u32) ~mod- delta) & 1023]
+
+    r[0] = CLAMP[((r[0] as base.u32) ~mod+ delta) & 1023]
+    g[0] = CLAMP[((g[0] as base.u32) ~mod+ delta) & 1023]
+    b[0] = CLAMP[((b[0] as base.u32) ~mod+ delta) & 1023]
+
+    r[3] = CLAMP[((r[2] as base.u32) ~mod- delta) & 1023]
+    g[3] = CLAMP[((g[2] as base.u32) ~mod- delta) & 1023]
+    b[3] = CLAMP[((b[2] as base.u32) ~mod- delta) & 1023]
+
+    r[2] = CLAMP[((r[2] as base.u32) ~mod+ delta) & 1023]
+    g[2] = CLAMP[((g[2] as base.u32) ~mod+ delta) & 1023]
+    b[2] = CLAMP[((b[2] as base.u32) ~mod+ delta) & 1023]
+
+    // Paint the 4×4 block.
+
+    while y < 4 {
+        x = 0
+        while x < 4,
+                inv y < 4,
+        {
+            x4y = (x * 4) | y
+            i = (((args.bits >> x4y) & 1) as base.u32) |
+                    (((args.bits >> (x4y + 15)) & 2) as base.u32)
+            o = args.offset + (x * 4) + (y * 1024)
+            this.buffer[o + 0] = b[i]
+            this.buffer[o + 1] = g[i]
+            this.buffer[o + 2] = r[i]
+            x += 1
+        }
+        y += 1
+    }
+}
+
+pri func decoder.decode_planar_mode!(
+        bits: base.u64,
+        offset: base.u32[..= 1008]) {
+    var ro : base.u32
+    var go : base.u32
+    var bo : base.u32
+    var rh : base.u32
+    var gh : base.u32
+    var bh : base.u32
+    var rv : base.u32
+    var gv : base.u32
+    var bv : base.u32
+
+    var y  : base.u32
+    var x  : base.u32
+    var o  : base.u32[..= 4092]
+    var rp : base.u32
+    var gp : base.u32
+    var bp : base.u32
+
+    // Unjumble Planar-mode's 57 bits and convert from RGB676 to RGB888.
+    //
+    // 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32  dec
+    // 3F 3E 3D 3C 3B 3A 39 38 37 36 35 34 33 32 31 30 2F 2E 2D 2C 2B 2A 29 28 27 26 25 24 23 22 21 20  hex
+    // .. Ro ro ro ro ro ro Go .. go go go go go go Bo .. .. .. bo bo .. bo bo bo Rh rh rh rh rh .. rh
+    //
+    // 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0  dec
+    // 1F 1E 1D 1C 1B 1A 19 18 17 16 15 14 13 12 11 10 0F 0E 0D 0C 0B 0A 09 08 07 06 05 04 03 02 01 00  hex
+    // Gh gh gh gh gh gh gh Bh bh bh bh bh bh Rv rv rv rv rv rv Gv gv gv gv gv gv gv Bv bv bv bv bv bv
+
+    ro = (0x3F & (args.bits >> 0x39)) as base.u32
+    ro = (ro ~mod<< 2) | (ro >> 4)
+    go = ((0x40 & (args.bits >> 0x32)) as base.u32) |
+            ((0x3F & (args.bits >> 0x31)) as base.u32)
+    go = (go ~mod<< 1) | (go >> 6)
+    bo = ((0x20 & (args.bits >> 0x2B)) as base.u32) |
+            ((0x18 & (args.bits >> 0x28)) as base.u32) |
+            ((0x07 & (args.bits >> 0x27)) as base.u32)
+    bo = (bo ~mod<< 2) | (bo >> 4)
+
+    rh = ((0x3E & (args.bits >> 0x21)) as base.u32) |
+            ((0x01 & (args.bits >> 0x20)) as base.u32)
+    rh = (rh ~mod<< 2) | (rh >> 4)
+    gh = (0x7F & (args.bits >> 0x19)) as base.u32
+    gh = (gh ~mod<< 1) | (gh >> 6)
+    bh = (0x3F & (args.bits >> 0x13)) as base.u32
+    bh = (bh ~mod<< 2) | (bh >> 4)
+
+    rv = (0x3F & (args.bits >> 0x0D)) as base.u32
+    rv = (rv ~mod<< 2) | (rv >> 4)
+    gv = (0x7F & (args.bits >> 0x06)) as base.u32
+    gv = (gv ~mod<< 1) | (gv >> 6)
+    bv = (0x3F & (args.bits >> 0x00)) as base.u32
+    bv = (bv ~mod<< 2) | (bv >> 4)
+
+    // Convert h and v colors from absolute to relative.
+
+    rh ~mod-= ro
+    gh ~mod-= go
+    bh ~mod-= bo
+
+    rv ~mod-= ro
+    gv ~mod-= go
+    bv ~mod-= bo
+
+    // Prepare to linearly interpolate (which involves dividing by 4).
+
+    ro ~mod*= 4
+    go ~mod*= 4
+    bo ~mod*= 4
+
+    // Paint the 4×4 block.
+
+    while y < 4 {
+        x = 0
+        while x < 4,
+                inv y < 4,
+        {
+            o = args.offset + (x * 4) + (y * 1024)
+            bp = ((x ~mod* bh) ~mod+ (y ~mod* bv)) ~mod+ bo
+            this.buffer[o + 0] = CLAMP[((bp ~mod+ 2) / 4) & 1023]
+            gp = ((x ~mod* gh) ~mod+ (y ~mod* gv)) ~mod+ go
+            this.buffer[o + 1] = CLAMP[((gp ~mod+ 2) / 4) & 1023]
+            rp = ((x ~mod* rh) ~mod+ (y ~mod* rv)) ~mod+ ro
+            this.buffer[o + 2] = CLAMP[((rp ~mod+ 2) / 4) & 1023]
+            x += 1
+        }
+        y += 1
+    }
+}
+
 pri func decoder.decode_half_block!(
         bits: base.u32,
         offset: base.u32[..= 1008],
@@ -507,6 +803,8 @@
         [0x0000_002F, 0x0000_00B7, 0xFFFF_FFD1, 0xFFFF_FF49],  // [-183, -47, 47, 183] re-ordered.
 ]
 
+pri const T_H_MODIFIERS : roarray[8] base.u8 = [3, 6, 11, 16, 23, 32, 41, 64]
+
 pri const CLAMP : roarray[1024] base.u8 = [
         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
         0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
diff --git a/test/nia-checksums-of-data.txt b/test/nia-checksums-of-data.txt
index 2239e53..adedf77 100644
--- a/test/nia-checksums-of-data.txt
+++ b/test/nia-checksums-of-data.txt
@@ -26,6 +26,7 @@
 OK. e08a7cc8 test/data/artificial-png/key-value-pairs.png
 OK. 076cb375 test/data/bricks-color.bmp
 OK. bdbbfadb test/data/bricks-color.etc1.pkm
+OK. 5670f263 test/data/bricks-color.etc2.pkm
 OK. 72a1f9cc test/data/bricks-color.jpeg
 OK. 076cb375 test/data/bricks-color.lossless.webp
 OK. 076cb375 test/data/bricks-color.png