example/convert-to-nia: allow 16-bit PNG output
diff --git a/example/convert-to-nia/convert-to-nia.c b/example/convert-to-nia/convert-to-nia.c
index 43a1758..2247525 100644
--- a/example/convert-to-nia/convert-to-nia.c
+++ b/example/convert-to-nia/convert-to-nia.c
@@ -159,8 +159,6 @@
     "Using -16 produces 16 bits per channel. For NIA/NIE output, this is the\n"
     "\"bn8\" version-and-configuration in the spec.\n"
     "\n"
-    "Combining -u and -16 is unsupported.\n"
-    "\n"
     "The -fail-if-unsandboxed flag causes the program to exit if it does not\n"
     "self-impose a sandbox. On Linux, it self-imposes a SECCOMP_MODE_STRICT\n"
     "sandbox, regardless of whether this flag was set.";
@@ -318,8 +316,6 @@
 
   if (num_one_of > 1) {
     return g_usage;
-  } else if (g_flags.output_uncompressed_png && g_flags.bit_depth_16) {
-    return "main: combining -u and -16 is unsupported";
   }
   g_flags.output_nia_or_crc32_digest =
       (num_one_of == 0) || g_flags.output_crc32_digest;
@@ -787,16 +783,17 @@
 
 bool  //
 print_uncompressed_png_frame() {
-  if (g_flags.bit_depth_16) {
-    return false;
-  }
   uint32_t pixfmt = 0;
   if (g_pixfmt_is_gray) {
-    pixfmt = UNCOMPNG__PIXEL_FORMAT__YXXX;
+    pixfmt = g_flags.bit_depth_16 ? UNCOMPNG__PIXEL_FORMAT__YXXX_4X16LE
+                                  : UNCOMPNG__PIXEL_FORMAT__YXXX;
   } else if (wuffs_base__pixel_buffer__is_opaque(&g_pixbuf)) {
-    pixfmt = UNCOMPNG__PIXEL_FORMAT__BGRX;
+    pixfmt = g_flags.bit_depth_16 ? UNCOMPNG__PIXEL_FORMAT__BGRX_4X16LE
+                                  : UNCOMPNG__PIXEL_FORMAT__BGRX;
   } else {
-    pixfmt = UNCOMPNG__PIXEL_FORMAT__BGRA_NONPREMUL;
+    pixfmt = g_flags.bit_depth_16
+                 ? UNCOMPNG__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE
+                 : UNCOMPNG__PIXEL_FORMAT__BGRA_NONPREMUL;
   }
 
   uint32_t w = wuffs_base__pixel_config__width(&g_pixbuf.pixcfg);
diff --git a/lib/uncompng/uncompng.go b/lib/uncompng/uncompng.go
index 1c0a821..b3b0c74 100644
--- a/lib/uncompng/uncompng.go
+++ b/lib/uncompng/uncompng.go
@@ -51,25 +51,27 @@
 type ColorType byte
 
 const (
-	// ColorTypeGray means 1 byte per pixel.
+	// ColorTypeGray means 1 byte per pixel (or 2 for Depth16, big-endian).
 	//
-	// This matches Go's image.Gray.Pix layout.
+	// This matches Go's image.Gray.Pix (or Gray16, for Depth16) layout.
 	ColorTypeGray = ColorType(1)
 
-	// ColorTypeRGBX means 4 bytes per pixel. Red, Green, Blue and the 4th
-	// channel is ignored.
+	// ColorTypeRGBX means 4 bytes per pixel (or 8 for Depth16, big-endian).
+	// Red, Green, Blue and the 4th channel is ignored.
 	//
-	// This matches Go's image.RGBA.Pix and image.NRGBA.Pix layouts, provided
-	// that all of the pixels' alpha values are 0xFF.
+	// This matches Go's image.RGBA.Pix and image.NRGBA.Pix layouts (or RGBA64
+	// or NRGBA64, for Depth16), provided that all of the pixels' alpha values
+	// are 0xFF (or 0xFFFF, for Depth16).
 	ColorTypeRGBX = ColorType(2)
 
-	// ColorTypeRGBX means 4 bytes per pixel. Red, Green, Blue and Alpha. RGB
-	// uses non-premultiplied alpha.
+	// ColorTypeRGBX means 4 bytes per pixel (or 8 for Depth16, big-endian).
+	// Red, Green, Blue and Alpha. RGB uses non-premultiplied alpha.
 	//
-	// This matches Go's image.NRGBA.Pix layout. If all of the pixels' alpha
-	// values are 0xFF then either ColorTypeRGBX or ColorTypeNRGBA will produce
-	// the same PNG output (in terms of pixels) but smaller (ColorTypeRGBX) or
-	// larger (ColorTypeNRGBA) output in terms of byte count.
+	// This matches Go's image.NRGBA.Pix layout (or NRGBA64, for Depth16). If
+	// all of the pixels' alpha values are 0xFF (or 0xFFFF, for Depth16) then
+	// either ColorTypeRGBX or ColorTypeNRGBA will produce the same PNG output
+	// (in terms of pixels) but smaller (ColorTypeRGBX) or larger
+	// (ColorTypeNRGBA) output in terms of byte count.
 	ColorTypeNRGBA = ColorType(3)
 )
 
@@ -87,13 +89,15 @@
 }
 
 // Depth is the number of bits per channel.
-//
-// This package only supports a depth of 8. In the future, it might also
-// support a depth of 16.
 type Depth byte
 
 const (
+	// Depth8 means one byte per pixel.
 	Depth8 = Depth(8)
+
+	// Depth16 means two bytes per pixel. Values are big-endian like the Go
+	// standard library's Gray16, RGBA64 and NRGBA64 image types.
+	Depth16 = Depth(16)
 )
 
 // Encoder is an opaque type that can convert a slice of pixel data to
@@ -189,11 +193,13 @@
 // Encode writes the pixel data to w. It makes no allocations above whatever
 // w.Write makes, if any.
 //
-// pix holds the pixel data, either 1 or 4 bytes per pixel depending on the
-// colorType. width and height are measured in pixels. stride is measured in
-// bytes. depth must be Depth8 although this might be relaxed in the future.
+// pix holds the pixel data, either 1 or 4 bytes per pixel (doubled for
+// Depth16) depending on the colorType. width and height are measured in
+// pixels. stride is measured in bytes. depth must be either Depth8 or Depth16.
 func (e *Encoder) Encode(w io.Writer, pix []byte, width int, height int, stride int, depth Depth, colorType ColorType) error {
-	if (width < 0) || (height < 0) || (depth != Depth8) || (colorType.pngFileFormatEncoding() == 0xFF) {
+	if (width < 0) || (height < 0) ||
+		((depth != Depth8) && (depth != Depth16)) ||
+		(colorType.pngFileFormatEncoding() == 0xFF) {
 		return errors.New("uncompng: invalid argument")
 	} else if (width > 0xFFFFFF) || (height > 0xFFFFFF) {
 		return errors.New("uncompng: unsupported image size")
@@ -214,8 +220,8 @@
 
 		row := pix[y*stride:]
 
-		switch colorType {
-		case ColorTypeGray:
+		switch ColorType(depth) | colorType {
+		case 0x08 | ColorTypeGray:
 			row = row[:1*width]
 			for x := 0; x < width; x++ {
 				if (ej + 1) > ejMax {
@@ -229,7 +235,7 @@
 				row = row[1:]
 			}
 
-		case ColorTypeRGBX:
+		case 0x08 | ColorTypeRGBX:
 			row = row[:4*width]
 			for x := 0; x < width; x++ {
 				if (ej + 3) > ejMax {
@@ -245,7 +251,7 @@
 				row = row[4:]
 			}
 
-		case ColorTypeNRGBA:
+		case 0x08 | ColorTypeNRGBA:
 			row = row[:4*width]
 			for x := 0; x < width; x++ {
 				if (ej + 4) > ejMax {
@@ -261,6 +267,61 @@
 				ej += 4
 				row = row[4:]
 			}
+
+		case 0x10 | ColorTypeGray:
+			row = row[:2*width]
+			for x := 0; x < width; x++ {
+				if (ej + 2) > ejMax {
+					if err := e.flush(w, ej, false); err != nil {
+						return err
+					}
+					ej = eiLater
+				}
+				e.buf[ej+0] = row[0]
+				e.buf[ej+1] = row[1]
+				ej += 2
+				row = row[2:]
+			}
+
+		case 0x10 | ColorTypeRGBX:
+			row = row[:8*width]
+			for x := 0; x < width; x++ {
+				if (ej + 6) > ejMax {
+					if err := e.flush(w, ej, false); err != nil {
+						return err
+					}
+					ej = eiLater
+				}
+				e.buf[ej+0] = row[0]
+				e.buf[ej+1] = row[1]
+				e.buf[ej+2] = row[2]
+				e.buf[ej+3] = row[3]
+				e.buf[ej+4] = row[4]
+				e.buf[ej+5] = row[5]
+				ej += 6
+				row = row[8:]
+			}
+
+		case 0x10 | ColorTypeNRGBA:
+			row = row[:8*width]
+			for x := 0; x < width; x++ {
+				if (ej + 8) > ejMax {
+					if err := e.flush(w, ej, false); err != nil {
+						return err
+					}
+					ej = eiLater
+				}
+				e.buf[ej+0] = row[0]
+				e.buf[ej+1] = row[1]
+				e.buf[ej+2] = row[2]
+				e.buf[ej+3] = row[3]
+				e.buf[ej+4] = row[4]
+				e.buf[ej+5] = row[5]
+				e.buf[ej+6] = row[6]
+				e.buf[ej+7] = row[7]
+				ej += 8
+				row = row[8:]
+			}
 		}
 	}
 
diff --git a/lib/uncompng/uncompng_test.go b/lib/uncompng/uncompng_test.go
index 7723e68..7375f33 100644
--- a/lib/uncompng/uncompng_test.go
+++ b/lib/uncompng/uncompng_test.go
@@ -28,17 +28,32 @@
 	case *image.Gray:
 		return e.Encode(w, src.Pix, b.Dx(), b.Dy(), src.Stride, Depth8, ColorTypeGray)
 
+	case *image.Gray16:
+		return e.Encode(w, src.Pix, b.Dx(), b.Dy(), src.Stride, Depth16, ColorTypeGray)
+
 	case *image.RGBA:
 		if src.Opaque() {
 			return e.Encode(w, src.Pix, b.Dx(), b.Dy(), src.Stride, Depth8, ColorTypeRGBX)
 		}
 
+	case *image.RGBA64:
+		if src.Opaque() {
+			return e.Encode(w, src.Pix, b.Dx(), b.Dy(), src.Stride, Depth16, ColorTypeRGBX)
+		}
+
 	case *image.NRGBA:
 		if src.Opaque() {
 			return e.Encode(w, src.Pix, b.Dx(), b.Dy(), src.Stride, Depth8, ColorTypeRGBX)
 		} else {
 			return e.Encode(w, src.Pix, b.Dx(), b.Dy(), src.Stride, Depth8, ColorTypeNRGBA)
 		}
+
+	case *image.NRGBA64:
+		if src.Opaque() {
+			return e.Encode(w, src.Pix, b.Dx(), b.Dy(), src.Stride, Depth16, ColorTypeRGBX)
+		} else {
+			return e.Encode(w, src.Pix, b.Dx(), b.Dy(), src.Stride, Depth16, ColorTypeNRGBA)
+		}
 	}
 
 	tmp := image.NewNRGBA(b)
@@ -50,16 +65,24 @@
 	switch m := m.(type) {
 	case *image.Gray:
 		return m.Pix
+	case *image.Gray16:
+		return m.Pix
 	case *image.RGBA:
 		return m.Pix
+	case *image.RGBA64:
+		return m.Pix
 	case *image.NRGBA:
 		return m.Pix
+	case *image.NRGBA64:
+		return m.Pix
 	}
 	return nil
 }
 
 func TestRoundTrip(tt *testing.T) {
 	testCases := []string{
+		"36.png",
+		"49.png",
 		"bricks-color.png",
 		"bricks-gray.png",
 		"harvesters.png",
diff --git a/snippet/uncompng.c b/snippet/uncompng.c
index d175054..c6b23e5 100644
--- a/snippet/uncompng.c
+++ b/snippet/uncompng.c
@@ -36,16 +36,24 @@
 #include <stddef.h>
 #include <stdint.h>
 
+// clang-format off
+
 // UNCOMPNG__PIXEL_FORMAT__ETC are the valid pixel_format values to pass to
 // uncompng__encode.
 //
 // These constants' values are the same as the corresponding Wuffs definitions,
 // after replacing the name's "WUFFS_BASE" prefix with "UNCOMPNG". This file is
 // stand-alone. It does not #include any Wuffs code.
-#define UNCOMPNG__PIXEL_FORMAT__Y 0x20000008
-#define UNCOMPNG__PIXEL_FORMAT__YXXX 0x30008888
-#define UNCOMPNG__PIXEL_FORMAT__BGRA_NONPREMUL 0x81008888
-#define UNCOMPNG__PIXEL_FORMAT__BGRX 0x90008888
+#define UNCOMPNG__PIXEL_FORMAT__Y                        0x20000008
+#define UNCOMPNG__PIXEL_FORMAT__Y_16LE                   0x2000000B
+#define UNCOMPNG__PIXEL_FORMAT__YXXX                     0x30008888
+#define UNCOMPNG__PIXEL_FORMAT__YXXX_4X16LE              0x3000BBBB
+#define UNCOMPNG__PIXEL_FORMAT__BGRA_NONPREMUL           0x81008888
+#define UNCOMPNG__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE    0x8100BBBB
+#define UNCOMPNG__PIXEL_FORMAT__BGRX                     0x90008888
+#define UNCOMPNG__PIXEL_FORMAT__BGRX_4X16LE              0x9000BBBB
+
+// clang-format on
 
 // UNCOMPNG__RESULT__ETC can be returned by uncompng__encode. write_func can
 // also return its own negative error codes, which are passed on.
@@ -185,23 +193,40 @@
   uncompng__private_impl_buffer[0x0015] = (uint8_t)(height >> 16);
   uncompng__private_impl_buffer[0x0016] = (uint8_t)(height >> 8);
   uncompng__private_impl_buffer[0x0017] = (uint8_t)(height >> 0);
-  uncompng__private_impl_buffer[0x0018] = 8;
 
+  uint8_t depth;
   uint8_t color_type;
   switch (pixel_format) {
     case UNCOMPNG__PIXEL_FORMAT__Y:
     case UNCOMPNG__PIXEL_FORMAT__YXXX:
+      depth = 8;
       color_type = 0;
       break;
     case UNCOMPNG__PIXEL_FORMAT__BGRA_NONPREMUL:
+      depth = 8;
       color_type = 6;
       break;
     case UNCOMPNG__PIXEL_FORMAT__BGRX:
+      depth = 8;
+      color_type = 2;
+      break;
+    case UNCOMPNG__PIXEL_FORMAT__Y_16LE:
+    case UNCOMPNG__PIXEL_FORMAT__YXXX_4X16LE:
+      depth = 16;
+      color_type = 0;
+      break;
+    case UNCOMPNG__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+      depth = 16;
+      color_type = 6;
+      break;
+    case UNCOMPNG__PIXEL_FORMAT__BGRX_4X16LE:
+      depth = 16;
       color_type = 2;
       break;
     default:
       return;
   }
+  uncompng__private_impl_buffer[0x0018] = depth;
   uncompng__private_impl_buffer[0x0019] = color_type;
   uncompng__private_impl_buffer[0x001A] = 0;
   uncompng__private_impl_buffer[0x001B] = 0;
@@ -413,6 +438,22 @@
         }
         break;
 
+      case UNCOMPNG__PIXEL_FORMAT__Y_16LE:
+        for (uint32_t x = 0; x < width; x++) {
+          if ((ej + 2) > ej_max) {
+            int err =
+                uncompng__private_impl_flush(write_func, context, ej, false);
+            if (err != 0) {
+              return err;
+            }
+            ej = ei_later;
+          }
+          uncompng__private_impl_buffer[ej++] = row[1];
+          uncompng__private_impl_buffer[ej++] = row[0];
+          row += 2;
+        }
+        break;
+
       case UNCOMPNG__PIXEL_FORMAT__YXXX:
         for (uint32_t x = 0; x < width; x++) {
           if ((ej + 1) > ej_max) {
@@ -428,6 +469,22 @@
         }
         break;
 
+      case UNCOMPNG__PIXEL_FORMAT__YXXX_4X16LE:
+        for (uint32_t x = 0; x < width; x++) {
+          if ((ej + 2) > ej_max) {
+            int err =
+                uncompng__private_impl_flush(write_func, context, ej, false);
+            if (err != 0) {
+              return err;
+            }
+            ej = ei_later;
+          }
+          uncompng__private_impl_buffer[ej++] = row[1];
+          uncompng__private_impl_buffer[ej++] = row[0];
+          row += 8;
+        }
+        break;
+
       case UNCOMPNG__PIXEL_FORMAT__BGRA_NONPREMUL:
         for (uint32_t x = 0; x < width; x++) {
           if ((ej + 4) > ej_max) {
@@ -446,6 +503,28 @@
         }
         break;
 
+      case UNCOMPNG__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+        for (uint32_t x = 0; x < width; x++) {
+          if ((ej + 8) > ej_max) {
+            int err =
+                uncompng__private_impl_flush(write_func, context, ej, false);
+            if (err != 0) {
+              return err;
+            }
+            ej = ei_later;
+          }
+          uncompng__private_impl_buffer[ej++] = row[5];
+          uncompng__private_impl_buffer[ej++] = row[4];
+          uncompng__private_impl_buffer[ej++] = row[3];
+          uncompng__private_impl_buffer[ej++] = row[2];
+          uncompng__private_impl_buffer[ej++] = row[1];
+          uncompng__private_impl_buffer[ej++] = row[0];
+          uncompng__private_impl_buffer[ej++] = row[7];
+          uncompng__private_impl_buffer[ej++] = row[6];
+          row += 8;
+        }
+        break;
+
       case UNCOMPNG__PIXEL_FORMAT__BGRX:
         for (uint32_t x = 0; x < width; x++) {
           if ((ej + 3) > ej_max) {
@@ -463,6 +542,26 @@
         }
         break;
 
+      case UNCOMPNG__PIXEL_FORMAT__BGRX_4X16LE:
+        for (uint32_t x = 0; x < width; x++) {
+          if ((ej + 6) > ej_max) {
+            int err =
+                uncompng__private_impl_flush(write_func, context, ej, false);
+            if (err != 0) {
+              return err;
+            }
+            ej = ei_later;
+          }
+          uncompng__private_impl_buffer[ej++] = row[5];
+          uncompng__private_impl_buffer[ej++] = row[4];
+          uncompng__private_impl_buffer[ej++] = row[3];
+          uncompng__private_impl_buffer[ej++] = row[2];
+          uncompng__private_impl_buffer[ej++] = row[1];
+          uncompng__private_impl_buffer[ej++] = row[0];
+          row += 8;
+        }
+        break;
+
       default:
         return UNCOMPNG__RESULT__INVALID_ARGUMENT;
     }
@@ -490,11 +589,19 @@
     case UNCOMPNG__PIXEL_FORMAT__Y:
       bytes_per_pixel = 1u;
       break;
+    case UNCOMPNG__PIXEL_FORMAT__Y_16LE:
+      bytes_per_pixel = 2u;
+      break;
     case UNCOMPNG__PIXEL_FORMAT__YXXX:
     case UNCOMPNG__PIXEL_FORMAT__BGRA_NONPREMUL:
     case UNCOMPNG__PIXEL_FORMAT__BGRX:
       bytes_per_pixel = 4u;
       break;
+    case UNCOMPNG__PIXEL_FORMAT__YXXX_4X16LE:
+    case UNCOMPNG__PIXEL_FORMAT__BGRA_NONPREMUL_4X16LE:
+    case UNCOMPNG__PIXEL_FORMAT__BGRX_4X16LE:
+      bytes_per_pixel = 8u;
+      break;
     default:
       return UNCOMPNG__RESULT__INVALID_ARGUMENT;
   }
diff --git a/test/data/36.png b/test/data/36.png
new file mode 100644
index 0000000..27655d1
--- /dev/null
+++ b/test/data/36.png
Binary files differ
diff --git a/test/data/49.png b/test/data/49.png
new file mode 100644
index 0000000..7681b0a
--- /dev/null
+++ b/test/data/49.png
Binary files differ
diff --git a/test/data/README.md b/test/data/README.md
index 284a8bd..4c7173b 100644
--- a/test/data/README.md
+++ b/test/data/README.md
@@ -38,6 +38,9 @@
 
 ---
 
+`36.png` and `49.png` are simple, artificially generated images. The generation
+script is `gen-36-49.go` from https://github.com/nigeltao/etc2
+
 `DCI-P3-D65.icc` comes from
 [color.org](https://www.color.org/chardata/rgb/DCIP3.xalter).
 `DCI-P3-D65.icc.zlib` is a zlib-compresion of that, created by Go's standard
diff --git a/test/nia-checksums-of-data.txt b/test/nia-checksums-of-data.txt
index 196d36b..7429c4e 100644
--- a/test/nia-checksums-of-data.txt
+++ b/test/nia-checksums-of-data.txt
@@ -1,4 +1,6 @@
 # Generated by script/print-nia-checksums.sh
+OK. 9720c028 test/data/36.png
+OK. f2a0a3f6 test/data/49.png
 OK. d3bb0beb test/data/DCI-P3-D65.icc
 OK. 646e081a test/data/animated-red-blue.000000.nie
 OK. 181c6916 test/data/animated-red-blue.000001.nie