Remove wuffs_base__global__null_io_buffer
diff --git a/internal/cgen/base/io-private.h b/internal/cgen/base/io-private.h
index fad529b..6aeb0fa 100644
--- a/internal/cgen/base/io-private.h
+++ b/internal/cgen/base/io-private.h
@@ -16,21 +16,6 @@
 
 // ---------------- I/O
 
-// "Null" as in "/dev/null", not as in "nullptr".
-//
-// TODO: ensure that this is zero-initialized.
-static wuffs_base__io_buffer wuffs_base__global__null_io_buffer;
-
-static inline wuffs_base__io_buffer*  //
-wuffs_base__null_io_reader() {
-  return &wuffs_base__global__null_io_buffer;
-}
-
-static inline wuffs_base__io_buffer*  //
-wuffs_base__null_io_writer() {
-  return &wuffs_base__global__null_io_buffer;
-}
-
 static inline uint64_t  //
 wuffs_base__io__count_since(uint64_t mark, uint64_t index) {
   if (index >= mark) {
diff --git a/internal/cgen/builtin.go b/internal/cgen/builtin.go
index 7a9a787..251ca1e 100644
--- a/internal/cgen/builtin.go
+++ b/internal/cgen/builtin.go
@@ -94,6 +94,17 @@
 			return g.writeBuiltinIOReader(b, recv, method.Ident(), n.Args(), depth)
 		case t.IDIOWriter:
 			return g.writeBuiltinIOWriter(b, recv, method.Ident(), n.Args(), depth)
+		case t.IDUtility:
+			switch method.Ident() {
+			case t.IDNullIOReader, t.IDNullIOWriter:
+				if !g.currFunk.usesEmptyIOBuffer {
+					g.currFunk.usesEmptyIOBuffer = true
+					g.currFunk.bPrologue.writes("wuffs_base__io_buffer empty_io_buffer = " +
+						"wuffs_base__null_io_buffer();\n\n")
+				}
+				b.writes("&empty_io_buffer")
+				return nil
+			}
 		}
 	}
 	return errNoSuchBuiltin
diff --git a/internal/cgen/data.go b/internal/cgen/data.go
index 09121da..a06de8e 100644
--- a/internal/cgen/data.go
+++ b/internal/cgen/data.go
@@ -163,13 +163,12 @@
 	""
 
 const baseIOPrivateH = "" +
-	"// ---------------- I/O\n\n// \"Null\" as in \"/dev/null\", not as in \"nullptr\".\n//\n// TODO: ensure that this is zero-initialized.\nstatic wuffs_base__io_buffer wuffs_base__global__null_io_buffer;\n\nstatic inline wuffs_base__io_buffer*  //\nwuffs_base__null_io_reader() {\n  return &wuffs_base__global__null_io_buffer;\n}\n\nstatic inline wuffs_base__io_buffer*  //\nwuffs_base__null_io_writer() {\n  return &wuffs_base__global__null_io_buffer;\n}\n\nstatic inline uint64_t  //\nwuffs_base__io__count_since(uint64_t mark, uint64_t index) {\n  if (index >= mark) {\n    return index - mark;\n  }\n  return 0;\n}\n\nstatic inline wuffs_base__slice_u8  //\nwuffs_base__io__since(uint64_t mark, uint64_t index, uint8_t* ptr) {\n  if (index >= mark) {\n    return wuffs_base__make_slice_u8(ptr + mark, index - mark);\n  }\n  return wuffs_base__make_slice_u8(NULL, 0);\n}\n\nstatic inline uint32_t  //\nwuffs_base__io_writer__copy_n_from_history(uint8_t** ptr_iop_w,\n                                           uint8_t* io1_w,\n                                       " +
-	"    uint8_t* io2_w,\n                                           uint32_t length,\n                                           uint32_t distance) {\n  if (!distance) {\n    return 0;\n  }\n  uint8_t* p = *ptr_iop_w;\n  if ((size_t)(p - io1_w) < (size_t)(distance)) {\n    return 0;\n  }\n  uint8_t* q = p - distance;\n  size_t n = (size_t)(io2_w - p);\n  if ((size_t)(length) > n) {\n    length = (uint32_t)(n);\n  } else {\n    n = (size_t)(length);\n  }\n  // TODO: unrolling by 3 seems best for the std/deflate benchmarks, but that\n  // is mostly because 3 is the minimum length for the deflate format. This\n  // function implementation shouldn't overfit to that one format. Perhaps the\n  // copy_n_from_history Wuffs method should also take an unroll hint argument,\n  // and the cgen can look if that argument is the constant expression '3'.\n  //\n  // See also wuffs_base__io_writer__copy_n_from_history_fast below.\n  //\n  // Alternatively, or additionally, have a sloppy_copy_n_from_history method\n  // that copies 8 bytes at a time, poss" +
-	"ibly writing more than length bytes?\n  for (; n >= 3; n -= 3) {\n    *p++ = *q++;\n    *p++ = *q++;\n    *p++ = *q++;\n  }\n  for (; n; n--) {\n    *p++ = *q++;\n  }\n  *ptr_iop_w = p;\n  return length;\n}\n\n// wuffs_base__io_writer__copy_n_from_history_fast is like the\n// wuffs_base__io_writer__copy_n_from_history function above, but has stronger\n// pre-conditions. The caller needs to prove that:\n//  - distance >  0\n//  - distance <= (*ptr_iop_w - io1_w)\n//  - length   <= (io2_w      - *ptr_iop_w)\nstatic inline uint32_t  //\nwuffs_base__io_writer__copy_n_from_history_fast(uint8_t** ptr_iop_w,\n                                                uint8_t* io1_w,\n                                                uint8_t* io2_w,\n                                                uint32_t length,\n                                                uint32_t distance) {\n  uint8_t* p = *ptr_iop_w;\n  uint8_t* q = p - distance;\n  uint32_t n = length;\n  for (; n >= 3; n -= 3) {\n    *p++ = *q++;\n    *p++ = *q++;\n    *p++ = *q++;\n  }\n  for (; n; " +
-	"n--) {\n    *p++ = *q++;\n  }\n  *ptr_iop_w = p;\n  return length;\n}\n\nstatic inline uint32_t  //\nwuffs_base__io_writer__copy_n_from_reader(uint8_t** ptr_iop_w,\n                                          uint8_t* io2_w,\n                                          uint32_t length,\n                                          uint8_t** ptr_iop_r,\n                                          uint8_t* io2_r) {\n  uint8_t* iop_w = *ptr_iop_w;\n  size_t n = length;\n  if (n > ((size_t)(io2_w - iop_w))) {\n    n = (size_t)(io2_w - iop_w);\n  }\n  uint8_t* iop_r = *ptr_iop_r;\n  if (n > ((size_t)(io2_r - iop_r))) {\n    n = (size_t)(io2_r - iop_r);\n  }\n  if (n > 0) {\n    memmove(iop_w, iop_r, n);\n    *ptr_iop_w += n;\n    *ptr_iop_r += n;\n  }\n  return (uint32_t)(n);\n}\n\nstatic inline uint64_t  //\nwuffs_base__io_writer__copy_from_slice(uint8_t** ptr_iop_w,\n                                       uint8_t* io2_w,\n                                       wuffs_base__slice_u8 src) {\n  uint8_t* iop_w = *ptr_iop_w;\n  size_t n = src.len;\n  if (n > ((s" +
-	"ize_t)(io2_w - iop_w))) {\n    n = (size_t)(io2_w - iop_w);\n  }\n  if (n > 0) {\n    memmove(iop_w, src.ptr, n);\n    *ptr_iop_w += n;\n  }\n  return (uint64_t)(n);\n}\n\nstatic inline uint32_t  //\nwuffs_base__io_writer__copy_n_from_slice(uint8_t** ptr_iop_w,\n                                         uint8_t* io2_w,\n                                         uint32_t length,\n                                         wuffs_base__slice_u8 src) {\n  uint8_t* iop_w = *ptr_iop_w;\n  size_t n = src.len;\n  if (n > length) {\n    n = length;\n  }\n  if (n > ((size_t)(io2_w - iop_w))) {\n    n = (size_t)(io2_w - iop_w);\n  }\n  if (n > 0) {\n    memmove(iop_w, src.ptr, n);\n    *ptr_iop_w += n;\n  }\n  return (uint32_t)(n);\n}\n\nstatic inline wuffs_base__io_buffer*  //\nwuffs_base__io_reader__set(wuffs_base__io_buffer* b,\n                           uint8_t** ptr_iop_r,\n                           uint8_t** ptr_io0_r,\n                           uint8_t** ptr_io1_r,\n                           uint8_t** ptr_io2_r,\n                           wuffs_ba" +
-	"se__slice_u8 data) {\n  b->data = data;\n  b->meta.wi = data.len;\n  b->meta.ri = 0;\n  b->meta.pos = 0;\n  b->meta.closed = false;\n\n  *ptr_iop_r = data.ptr;\n  *ptr_io0_r = data.ptr;\n  *ptr_io1_r = data.ptr;\n  *ptr_io2_r = data.ptr + data.len;\n\n  return b;\n}\n\nstatic inline wuffs_base__slice_u8  //\nwuffs_base__io_reader__take(uint8_t** ptr_iop_r, uint8_t* io2_r, uint64_t n) {\n  if (n <= ((size_t)(io2_r - *ptr_iop_r))) {\n    uint8_t* p = *ptr_iop_r;\n    *ptr_iop_r += n;\n    return wuffs_base__make_slice_u8(p, n);\n  }\n  return wuffs_base__make_slice_u8(NULL, 0);\n}\n\nstatic inline wuffs_base__io_buffer*  //\nwuffs_base__io_writer__set(wuffs_base__io_buffer* b,\n                           uint8_t** ptr_iop_w,\n                           uint8_t** ptr_io0_w,\n                           uint8_t** ptr_io1_w,\n                           uint8_t** ptr_io2_w,\n                           wuffs_base__slice_u8 data) {\n  b->data = data;\n  b->meta.wi = 0;\n  b->meta.ri = 0;\n  b->meta.pos = 0;\n  b->meta.closed = false;\n\n  *ptr_iop_w = dat" +
-	"a.ptr;\n  *ptr_io0_w = data.ptr;\n  *ptr_io1_w = data.ptr;\n  *ptr_io2_w = data.ptr + data.len;\n\n  return b;\n}\n\n" +
+	"// ---------------- I/O\n\nstatic inline uint64_t  //\nwuffs_base__io__count_since(uint64_t mark, uint64_t index) {\n  if (index >= mark) {\n    return index - mark;\n  }\n  return 0;\n}\n\nstatic inline wuffs_base__slice_u8  //\nwuffs_base__io__since(uint64_t mark, uint64_t index, uint8_t* ptr) {\n  if (index >= mark) {\n    return wuffs_base__make_slice_u8(ptr + mark, index - mark);\n  }\n  return wuffs_base__make_slice_u8(NULL, 0);\n}\n\nstatic inline uint32_t  //\nwuffs_base__io_writer__copy_n_from_history(uint8_t** ptr_iop_w,\n                                           uint8_t* io1_w,\n                                           uint8_t* io2_w,\n                                           uint32_t length,\n                                           uint32_t distance) {\n  if (!distance) {\n    return 0;\n  }\n  uint8_t* p = *ptr_iop_w;\n  if ((size_t)(p - io1_w) < (size_t)(distance)) {\n    return 0;\n  }\n  uint8_t* q = p - distance;\n  size_t n = (size_t)(io2_w - p);\n  if ((size_t)(length) > n) {\n    length = (uint32_t)(n);\n  } else {\n" +
+	"    n = (size_t)(length);\n  }\n  // TODO: unrolling by 3 seems best for the std/deflate benchmarks, but that\n  // is mostly because 3 is the minimum length for the deflate format. This\n  // function implementation shouldn't overfit to that one format. Perhaps the\n  // copy_n_from_history Wuffs method should also take an unroll hint argument,\n  // and the cgen can look if that argument is the constant expression '3'.\n  //\n  // See also wuffs_base__io_writer__copy_n_from_history_fast below.\n  //\n  // Alternatively, or additionally, have a sloppy_copy_n_from_history method\n  // that copies 8 bytes at a time, possibly writing more than length bytes?\n  for (; n >= 3; n -= 3) {\n    *p++ = *q++;\n    *p++ = *q++;\n    *p++ = *q++;\n  }\n  for (; n; n--) {\n    *p++ = *q++;\n  }\n  *ptr_iop_w = p;\n  return length;\n}\n\n// wuffs_base__io_writer__copy_n_from_history_fast is like the\n// wuffs_base__io_writer__copy_n_from_history function above, but has stronger\n// pre-conditions. The caller needs to prove that:\n//  - distance >  " +
+	"0\n//  - distance <= (*ptr_iop_w - io1_w)\n//  - length   <= (io2_w      - *ptr_iop_w)\nstatic inline uint32_t  //\nwuffs_base__io_writer__copy_n_from_history_fast(uint8_t** ptr_iop_w,\n                                                uint8_t* io1_w,\n                                                uint8_t* io2_w,\n                                                uint32_t length,\n                                                uint32_t distance) {\n  uint8_t* p = *ptr_iop_w;\n  uint8_t* q = p - distance;\n  uint32_t n = length;\n  for (; n >= 3; n -= 3) {\n    *p++ = *q++;\n    *p++ = *q++;\n    *p++ = *q++;\n  }\n  for (; n; n--) {\n    *p++ = *q++;\n  }\n  *ptr_iop_w = p;\n  return length;\n}\n\nstatic inline uint32_t  //\nwuffs_base__io_writer__copy_n_from_reader(uint8_t** ptr_iop_w,\n                                          uint8_t* io2_w,\n                                          uint32_t length,\n                                          uint8_t** ptr_iop_r,\n                                          uint8_t* io2_r) {\n  uint8_t* i" +
+	"op_w = *ptr_iop_w;\n  size_t n = length;\n  if (n > ((size_t)(io2_w - iop_w))) {\n    n = (size_t)(io2_w - iop_w);\n  }\n  uint8_t* iop_r = *ptr_iop_r;\n  if (n > ((size_t)(io2_r - iop_r))) {\n    n = (size_t)(io2_r - iop_r);\n  }\n  if (n > 0) {\n    memmove(iop_w, iop_r, n);\n    *ptr_iop_w += n;\n    *ptr_iop_r += n;\n  }\n  return (uint32_t)(n);\n}\n\nstatic inline uint64_t  //\nwuffs_base__io_writer__copy_from_slice(uint8_t** ptr_iop_w,\n                                       uint8_t* io2_w,\n                                       wuffs_base__slice_u8 src) {\n  uint8_t* iop_w = *ptr_iop_w;\n  size_t n = src.len;\n  if (n > ((size_t)(io2_w - iop_w))) {\n    n = (size_t)(io2_w - iop_w);\n  }\n  if (n > 0) {\n    memmove(iop_w, src.ptr, n);\n    *ptr_iop_w += n;\n  }\n  return (uint64_t)(n);\n}\n\nstatic inline uint32_t  //\nwuffs_base__io_writer__copy_n_from_slice(uint8_t** ptr_iop_w,\n                                         uint8_t* io2_w,\n                                         uint32_t length,\n                                         w" +
+	"uffs_base__slice_u8 src) {\n  uint8_t* iop_w = *ptr_iop_w;\n  size_t n = src.len;\n  if (n > length) {\n    n = length;\n  }\n  if (n > ((size_t)(io2_w - iop_w))) {\n    n = (size_t)(io2_w - iop_w);\n  }\n  if (n > 0) {\n    memmove(iop_w, src.ptr, n);\n    *ptr_iop_w += n;\n  }\n  return (uint32_t)(n);\n}\n\nstatic inline wuffs_base__io_buffer*  //\nwuffs_base__io_reader__set(wuffs_base__io_buffer* b,\n                           uint8_t** ptr_iop_r,\n                           uint8_t** ptr_io0_r,\n                           uint8_t** ptr_io1_r,\n                           uint8_t** ptr_io2_r,\n                           wuffs_base__slice_u8 data) {\n  b->data = data;\n  b->meta.wi = data.len;\n  b->meta.ri = 0;\n  b->meta.pos = 0;\n  b->meta.closed = false;\n\n  *ptr_iop_r = data.ptr;\n  *ptr_io0_r = data.ptr;\n  *ptr_io1_r = data.ptr;\n  *ptr_io2_r = data.ptr + data.len;\n\n  return b;\n}\n\nstatic inline wuffs_base__slice_u8  //\nwuffs_base__io_reader__take(uint8_t** ptr_iop_r, uint8_t* io2_r, uint64_t n) {\n  if (n <= ((size_t)(io2_r - *ptr_i" +
+	"op_r))) {\n    uint8_t* p = *ptr_iop_r;\n    *ptr_iop_r += n;\n    return wuffs_base__make_slice_u8(p, n);\n  }\n  return wuffs_base__make_slice_u8(NULL, 0);\n}\n\nstatic inline wuffs_base__io_buffer*  //\nwuffs_base__io_writer__set(wuffs_base__io_buffer* b,\n                           uint8_t** ptr_iop_w,\n                           uint8_t** ptr_io0_w,\n                           uint8_t** ptr_io1_w,\n                           uint8_t** ptr_io2_w,\n                           wuffs_base__slice_u8 data) {\n  b->data = data;\n  b->meta.wi = 0;\n  b->meta.ri = 0;\n  b->meta.pos = 0;\n  b->meta.closed = false;\n\n  *ptr_iop_w = data.ptr;\n  *ptr_io0_w = data.ptr;\n  *ptr_io1_w = data.ptr;\n  *ptr_io2_w = data.ptr + data.len;\n\n  return b;\n}\n\n" +
 	"" +
 	"// ---------------- I/O (Utility)\n\n#define wuffs_base__utility__null_io_reader wuffs_base__null_io_reader\n#define wuffs_base__utility__null_io_writer wuffs_base__null_io_writer\n" +
 	""
diff --git a/internal/cgen/func.go b/internal/cgen/func.go
index 3f8daf5..de89009 100644
--- a/internal/cgen/func.go
+++ b/internal/cgen/func.go
@@ -34,16 +34,17 @@
 	coroID        uint32
 	returnsStatus bool
 
-	varList       []*a.Var
-	varResumables map[t.ID]bool
-	derivedVars   map[t.ID]struct{}
-	jumpTargets   map[a.Loop]uint32
-	coroSuspPoint uint32
-	ioBinds       uint32
-	tempW         uint32
-	tempR         uint32
-	usesScratch   bool
-	hasGotoOK     bool
+	varList           []*a.Var
+	varResumables     map[t.ID]bool
+	derivedVars       map[t.ID]struct{}
+	jumpTargets       map[a.Loop]uint32
+	coroSuspPoint     uint32
+	ioBinds           uint32
+	tempW             uint32
+	tempR             uint32
+	usesEmptyIOBuffer bool
+	usesScratch       bool
+	hasGotoOK         bool
 }
 
 func (k *funk) jumpTarget(n a.Loop) (uint32, error) {
diff --git a/lang/token/list.go b/lang/token/list.go
index 765ef80..d4918c0 100644
--- a/lang/token/list.go
+++ b/lang/token/list.go
@@ -436,13 +436,15 @@
 	IDU32 = ID(0x116)
 	IDU64 = ID(0x117)
 
-	IDBase        = ID(0x120)
-	IDBool        = ID(0x121)
-	IDEmptyStruct = ID(0x122)
-	IDIOReader    = ID(0x123)
-	IDIOWriter    = ID(0x124)
-	IDStatus      = ID(0x125)
-	IDUtility     = ID(0x126)
+	IDBase         = ID(0x120)
+	IDBool         = ID(0x121)
+	IDNullIOReader = ID(0x122)
+	IDNullIOWriter = ID(0x123)
+	IDEmptyStruct  = ID(0x124)
+	IDIOReader     = ID(0x125)
+	IDIOWriter     = ID(0x126)
+	IDStatus       = ID(0x127)
+	IDUtility      = ID(0x128)
 
 	IDRangeIEU32 = ID(0x130)
 	IDRangeIIU32 = ID(0x131)
@@ -768,13 +770,15 @@
 	IDU32: "u32",
 	IDU64: "u64",
 
-	IDBase:        "base",
-	IDBool:        "bool",
-	IDEmptyStruct: "empty_struct",
-	IDIOReader:    "io_reader",
-	IDIOWriter:    "io_writer",
-	IDStatus:      "status",
-	IDUtility:     "utility",
+	IDBase:         "base",
+	IDBool:         "bool",
+	IDNullIOReader: "null_io_reader",
+	IDNullIOWriter: "null_io_writer",
+	IDEmptyStruct:  "empty_struct",
+	IDIOReader:     "io_reader",
+	IDIOWriter:     "io_writer",
+	IDStatus:       "status",
+	IDUtility:      "utility",
 
 	IDRangeIEU32: "range_ie_u32",
 	IDRangeIIU32: "range_ii_u32",
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 17819b3..7c8002e 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -4456,21 +4456,6 @@
 
 // ---------------- I/O
 
-// "Null" as in "/dev/null", not as in "nullptr".
-//
-// TODO: ensure that this is zero-initialized.
-static wuffs_base__io_buffer wuffs_base__global__null_io_buffer;
-
-static inline wuffs_base__io_buffer*  //
-wuffs_base__null_io_reader() {
-  return &wuffs_base__global__null_io_buffer;
-}
-
-static inline wuffs_base__io_buffer*  //
-wuffs_base__null_io_writer() {
-  return &wuffs_base__global__null_io_buffer;
-}
-
 static inline uint64_t  //
 wuffs_base__io__count_since(uint64_t mark, uint64_t index) {
   if (index >= mark) {
@@ -10815,6 +10800,8 @@
     io2_a_src = io0_a_src + a_src->meta.wi;
   }
 
+  wuffs_base__io_buffer empty_io_buffer = wuffs_base__null_io_buffer();
+
   uint32_t coro_susp_point = self->private_impl.p_decode_id_part2[0];
   if (coro_susp_point) {
     v_block_size = self->private_data.s_decode_id_part2[0].v_block_size;
@@ -10909,8 +10896,7 @@
           {
             u_r.meta.ri = ((size_t)(iop_v_r - u_r.data.ptr));
             wuffs_base__status t_1 = wuffs_lzw__decoder__decode_io_writer(
-                &self->private_data.f_lzw,
-                wuffs_base__utility__null_io_writer(), v_r,
+                &self->private_data.f_lzw, &empty_io_buffer, v_r,
                 wuffs_base__utility__null_slice_u8());
             iop_v_r = u_r.data.ptr + u_r.meta.ri;
             v_lzw_status = t_1;