Support zlib dictionaries
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 04f91d0..8a74ba9 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -3872,10 +3872,12 @@
 
 // ---------------- Status Codes
 
+extern const char* wuffs_zlib__warning__dictionary_required;
 extern const char* wuffs_zlib__error__bad_checksum;
 extern const char* wuffs_zlib__error__bad_compression_method;
 extern const char* wuffs_zlib__error__bad_compression_window_size;
 extern const char* wuffs_zlib__error__bad_parity_check;
+extern const char* wuffs_zlib__error__incorrect_dictionary;
 
 // ---------------- Public Consts
 
@@ -3908,6 +3910,13 @@
 
 // ---------------- Public Function Prototypes
 
+WUFFS_BASE__MAYBE_STATIC uint32_t  //
+wuffs_zlib__decoder__dictionary_id(const wuffs_zlib__decoder* self);
+
+WUFFS_BASE__MAYBE_STATIC wuffs_base__empty_struct  //
+wuffs_zlib__decoder__add_dictionary(wuffs_zlib__decoder* self,
+                                    wuffs_base__slice_u8 a_dict);
+
 WUFFS_BASE__MAYBE_STATIC wuffs_base__empty_struct  //
 wuffs_zlib__decoder__set_ignore_checksum(wuffs_zlib__decoder* self, bool a_ic);
 
@@ -3943,13 +3952,20 @@
     uint32_t magic;
     uint32_t active_coroutine;
 
+    bool f_bad_call_sequence;
+    bool f_header_complete;
+    bool f_got_dictionary;
+    bool f_want_dictionary;
     bool f_ignore_checksum;
+    uint32_t f_dict_id_got;
+    uint32_t f_dict_id_want;
 
     uint32_t p_decode_io_writer[1];
   } private_impl;
 
   struct {
     wuffs_adler32__hasher f_checksum;
+    wuffs_adler32__hasher f_dict_id_hasher;
     wuffs_deflate__decoder f_flate;
 
     struct {
@@ -3990,6 +4006,16 @@
                                            wuffs_version, initialize_flags);
   }
 
+  inline uint32_t  //
+  dictionary_id() const {
+    return wuffs_zlib__decoder__dictionary_id(this);
+  }
+
+  inline wuffs_base__empty_struct  //
+  add_dictionary(wuffs_base__slice_u8 a_dict) {
+    return wuffs_zlib__decoder__add_dictionary(this, a_dict);
+  }
+
   inline wuffs_base__empty_struct  //
   set_ignore_checksum(bool a_ic) {
     return wuffs_zlib__decoder__set_ignore_checksum(this, a_ic);
@@ -11692,14 +11718,16 @@
 
 // ---------------- Status Codes Implementations
 
+const char* wuffs_zlib__warning__dictionary_required =
+    "@zlib: dictionary required";
 const char* wuffs_zlib__error__bad_checksum = "#zlib: bad checksum";
 const char* wuffs_zlib__error__bad_compression_method =
     "#zlib: bad compression method";
 const char* wuffs_zlib__error__bad_compression_window_size =
     "#zlib: bad compression window size";
 const char* wuffs_zlib__error__bad_parity_check = "#zlib: bad parity check";
-const char* wuffs_zlib__error__todo_unsupported_preset_dictionary =
-    "#zlib: TODO: unsupported preset dictionary";
+const char* wuffs_zlib__error__incorrect_dictionary =
+    "#zlib: incorrect dictionary";
 
 // ---------------- Private Consts
 
@@ -11759,6 +11787,15 @@
     }
   }
   {
+    wuffs_base__status z = wuffs_adler32__hasher__initialize(
+        &self->private_data.f_dict_id_hasher,
+        sizeof(self->private_data.f_dict_id_hasher), WUFFS_VERSION,
+        initialize_flags);
+    if (z) {
+      return z;
+    }
+  }
+  {
     wuffs_base__status z = wuffs_deflate__decoder__initialize(
         &self->private_data.f_flate, sizeof(self->private_data.f_flate),
         WUFFS_VERSION, initialize_flags);
@@ -11777,6 +11814,44 @@
 
 // ---------------- Function Implementations
 
+// -------- func zlib.decoder.dictionary_id
+
+WUFFS_BASE__MAYBE_STATIC uint32_t  //
+wuffs_zlib__decoder__dictionary_id(const wuffs_zlib__decoder* self) {
+  if (!self) {
+    return 0;
+  }
+  if ((self->private_impl.magic != WUFFS_BASE__MAGIC) &&
+      (self->private_impl.magic != WUFFS_BASE__DISABLED)) {
+    return 0;
+  }
+
+  return self->private_impl.f_dict_id_want;
+}
+
+// -------- func zlib.decoder.add_dictionary
+
+WUFFS_BASE__MAYBE_STATIC wuffs_base__empty_struct  //
+wuffs_zlib__decoder__add_dictionary(wuffs_zlib__decoder* self,
+                                    wuffs_base__slice_u8 a_dict) {
+  if (!self) {
+    return wuffs_base__make_empty_struct();
+  }
+  if (self->private_impl.magic != WUFFS_BASE__MAGIC) {
+    return wuffs_base__make_empty_struct();
+  }
+
+  if (self->private_impl.f_header_complete) {
+    self->private_impl.f_bad_call_sequence = true;
+  } else {
+    self->private_impl.f_dict_id_got = wuffs_adler32__hasher__update(
+        &self->private_data.f_dict_id_hasher, a_dict);
+    wuffs_deflate__decoder__add_history(&self->private_data.f_flate, a_dict);
+  }
+  self->private_impl.f_got_dictionary = true;
+  return wuffs_base__make_empty_struct();
+}
+
 // -------- func zlib.decoder.set_ignore_checksum
 
 WUFFS_BASE__MAYBE_STATIC wuffs_base__empty_struct  //
@@ -11871,51 +11946,101 @@
   switch (coro_susp_point) {
     WUFFS_BASE__COROUTINE_SUSPENSION_POINT_0;
 
-    {
-      WUFFS_BASE__COROUTINE_SUSPENSION_POINT(1);
-      uint16_t t_0;
-      if (WUFFS_BASE__LIKELY(io2_a_src - iop_a_src >= 2)) {
-        t_0 = wuffs_base__load_u16be(iop_a_src);
-        iop_a_src += 2;
-      } else {
-        self->private_data.s_decode_io_writer[0].scratch = 0;
-        WUFFS_BASE__COROUTINE_SUSPENSION_POINT(2);
-        while (true) {
-          if (WUFFS_BASE__UNLIKELY(iop_a_src == io2_a_src)) {
-            status = wuffs_base__suspension__short_read;
-            goto suspend;
+    if (self->private_impl.f_bad_call_sequence) {
+      status = wuffs_base__error__bad_call_sequence;
+      goto exit;
+    } else if (!self->private_impl.f_want_dictionary) {
+      {
+        WUFFS_BASE__COROUTINE_SUSPENSION_POINT(1);
+        uint16_t t_0;
+        if (WUFFS_BASE__LIKELY(io2_a_src - iop_a_src >= 2)) {
+          t_0 = wuffs_base__load_u16be(iop_a_src);
+          iop_a_src += 2;
+        } else {
+          self->private_data.s_decode_io_writer[0].scratch = 0;
+          WUFFS_BASE__COROUTINE_SUSPENSION_POINT(2);
+          while (true) {
+            if (WUFFS_BASE__UNLIKELY(iop_a_src == io2_a_src)) {
+              status = wuffs_base__suspension__short_read;
+              goto suspend;
+            }
+            uint64_t* scratch =
+                &self->private_data.s_decode_io_writer[0].scratch;
+            uint32_t num_bits_0 = ((uint32_t)(*scratch & 0xFF));
+            *scratch >>= 8;
+            *scratch <<= 8;
+            *scratch |= ((uint64_t)(*iop_a_src++)) << (56 - num_bits_0);
+            if (num_bits_0 == 8) {
+              t_0 = ((uint16_t)(*scratch >> 48));
+              break;
+            }
+            num_bits_0 += 8;
+            *scratch |= ((uint64_t)(num_bits_0));
           }
-          uint64_t* scratch = &self->private_data.s_decode_io_writer[0].scratch;
-          uint32_t num_bits_0 = ((uint32_t)(*scratch & 0xFF));
-          *scratch >>= 8;
-          *scratch <<= 8;
-          *scratch |= ((uint64_t)(*iop_a_src++)) << (56 - num_bits_0);
-          if (num_bits_0 == 8) {
-            t_0 = ((uint16_t)(*scratch >> 48));
-            break;
-          }
-          num_bits_0 += 8;
-          *scratch |= ((uint64_t)(num_bits_0));
         }
+        v_x = t_0;
       }
-      v_x = t_0;
+      if (((v_x >> 8) & 15) != 8) {
+        status = wuffs_zlib__error__bad_compression_method;
+        goto exit;
+      }
+      if ((v_x >> 12) > 7) {
+        status = wuffs_zlib__error__bad_compression_window_size;
+        goto exit;
+      }
+      if ((v_x % 31) != 0) {
+        status = wuffs_zlib__error__bad_parity_check;
+        goto exit;
+      }
+      self->private_impl.f_want_dictionary = ((v_x & 32) != 0);
+      if (self->private_impl.f_want_dictionary) {
+        self->private_impl.f_dict_id_got = 1;
+        {
+          WUFFS_BASE__COROUTINE_SUSPENSION_POINT(3);
+          uint32_t t_1;
+          if (WUFFS_BASE__LIKELY(io2_a_src - iop_a_src >= 4)) {
+            t_1 = wuffs_base__load_u32be(iop_a_src);
+            iop_a_src += 4;
+          } else {
+            self->private_data.s_decode_io_writer[0].scratch = 0;
+            WUFFS_BASE__COROUTINE_SUSPENSION_POINT(4);
+            while (true) {
+              if (WUFFS_BASE__UNLIKELY(iop_a_src == io2_a_src)) {
+                status = wuffs_base__suspension__short_read;
+                goto suspend;
+              }
+              uint64_t* scratch =
+                  &self->private_data.s_decode_io_writer[0].scratch;
+              uint32_t num_bits_1 = ((uint32_t)(*scratch & 0xFF));
+              *scratch >>= 8;
+              *scratch <<= 8;
+              *scratch |= ((uint64_t)(*iop_a_src++)) << (56 - num_bits_1);
+              if (num_bits_1 == 24) {
+                t_1 = ((uint32_t)(*scratch >> 32));
+                break;
+              }
+              num_bits_1 += 8;
+              *scratch |= ((uint64_t)(num_bits_1));
+            }
+          }
+          self->private_impl.f_dict_id_want = t_1;
+        }
+        status = wuffs_zlib__warning__dictionary_required;
+        goto ok;
+      } else if (self->private_impl.f_got_dictionary) {
+        status = wuffs_zlib__error__incorrect_dictionary;
+        goto exit;
+      }
+    } else if (self->private_impl.f_dict_id_got !=
+               self->private_impl.f_dict_id_want) {
+      if (self->private_impl.f_got_dictionary) {
+        status = wuffs_zlib__error__incorrect_dictionary;
+        goto exit;
+      }
+      status = wuffs_zlib__warning__dictionary_required;
+      goto ok;
     }
-    if (((v_x >> 8) & 15) != 8) {
-      status = wuffs_zlib__error__bad_compression_method;
-      goto exit;
-    }
-    if ((v_x >> 12) > 7) {
-      status = wuffs_zlib__error__bad_compression_window_size;
-      goto exit;
-    }
-    if ((v_x & 32) != 0) {
-      status = wuffs_zlib__error__todo_unsupported_preset_dictionary;
-      goto exit;
-    }
-    if ((v_x % 31) != 0) {
-      status = wuffs_zlib__error__bad_parity_check;
-      goto exit;
-    }
+    self->private_impl.f_header_complete = true;
     while (true) {
       v_mark = ((uint64_t)(iop_a_dst - io0_a_dst));
       {
@@ -11925,7 +12050,7 @@
         if (a_src) {
           a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
         }
-        wuffs_base__status t_1 = wuffs_deflate__decoder__decode_io_writer(
+        wuffs_base__status t_2 = wuffs_deflate__decoder__decode_io_writer(
             &self->private_data.f_flate, a_dst, a_src, a_workbuf);
         if (a_dst) {
           iop_a_dst = a_dst->data.ptr + a_dst->meta.wi;
@@ -11933,7 +12058,7 @@
         if (a_src) {
           iop_a_src = a_src->data.ptr + a_src->meta.ri;
         }
-        v_status = t_1;
+        v_status = t_2;
       }
       if (!self->private_impl.f_ignore_checksum) {
         v_checksum_got = wuffs_adler32__hasher__update(
@@ -11945,37 +12070,37 @@
         goto label_0_break;
       }
       status = v_status;
-      WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(3);
+      WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(5);
     }
   label_0_break:;
     {
-      WUFFS_BASE__COROUTINE_SUSPENSION_POINT(4);
-      uint32_t t_2;
+      WUFFS_BASE__COROUTINE_SUSPENSION_POINT(6);
+      uint32_t t_3;
       if (WUFFS_BASE__LIKELY(io2_a_src - iop_a_src >= 4)) {
-        t_2 = wuffs_base__load_u32be(iop_a_src);
+        t_3 = wuffs_base__load_u32be(iop_a_src);
         iop_a_src += 4;
       } else {
         self->private_data.s_decode_io_writer[0].scratch = 0;
-        WUFFS_BASE__COROUTINE_SUSPENSION_POINT(5);
+        WUFFS_BASE__COROUTINE_SUSPENSION_POINT(7);
         while (true) {
           if (WUFFS_BASE__UNLIKELY(iop_a_src == io2_a_src)) {
             status = wuffs_base__suspension__short_read;
             goto suspend;
           }
           uint64_t* scratch = &self->private_data.s_decode_io_writer[0].scratch;
-          uint32_t num_bits_2 = ((uint32_t)(*scratch & 0xFF));
+          uint32_t num_bits_3 = ((uint32_t)(*scratch & 0xFF));
           *scratch >>= 8;
           *scratch <<= 8;
-          *scratch |= ((uint64_t)(*iop_a_src++)) << (56 - num_bits_2);
-          if (num_bits_2 == 24) {
-            t_2 = ((uint32_t)(*scratch >> 32));
+          *scratch |= ((uint64_t)(*iop_a_src++)) << (56 - num_bits_3);
+          if (num_bits_3 == 24) {
+            t_3 = ((uint32_t)(*scratch >> 32));
             break;
           }
-          num_bits_2 += 8;
-          *scratch |= ((uint64_t)(num_bits_2));
+          num_bits_3 += 8;
+          *scratch |= ((uint64_t)(num_bits_3));
         }
       }
-      v_checksum_want = t_2;
+      v_checksum_want = t_3;
     }
     if (!self->private_impl.f_ignore_checksum &&
         (v_checksum_got != v_checksum_want)) {
diff --git a/std/zlib/decode_zlib.wuffs b/std/zlib/decode_zlib.wuffs
index fcf6c65..8170abb 100644
--- a/std/zlib/decode_zlib.wuffs
+++ b/std/zlib/decode_zlib.wuffs
@@ -15,25 +15,50 @@
 use "std/adler32"
 use "std/deflate"
 
+pub status "@dictionary required"
+
 pub status "#bad checksum"
 pub status "#bad compression method"
 pub status "#bad compression window size"
 pub status "#bad parity check"
-
-pri status "#TODO: unsupported preset dictionary"
+pub status "#incorrect dictionary"
 
 // TODO: reference deflate.decoder_workbuf_len_max_incl_worst_case.
 pub const decoder_workbuf_len_max_incl_worst_case base.u64 = 1
 
 pub struct decoder?(
+	bad_call_sequence base.bool,
+	header_complete   base.bool,
+
+	got_dictionary  base.bool,
+	want_dictionary base.bool,
+
 	ignore_checksum base.bool,
 	checksum        adler32.hasher,
 
+	dict_id_hasher adler32.hasher,
+	dict_id_got    base.u32,
+	dict_id_want   base.u32,
+
 	flate deflate.decoder,
 
 	util base.utility,
 )
 
+pub func decoder.dictionary_id() base.u32 {
+	return this.dict_id_want
+}
+
+pub func decoder.add_dictionary!(dict slice base.u8) {
+	if this.header_complete {
+		this.bad_call_sequence = true
+	} else {
+		this.dict_id_got = this.dict_id_hasher.update!(x:args.dict)
+		this.flate.add_history!(hist:args.dict)
+	}
+	this.got_dictionary = true
+}
+
 pub func decoder.set_ignore_checksum!(ic base.bool) {
 	this.ignore_checksum = args.ic
 }
@@ -51,19 +76,35 @@
 	var checksum_want base.u32
 	var mark          base.u64
 
-	x = args.src.read_u16be?()
-	if ((x >> 8) & 0x0F) <> 0x08 {
-		return "#bad compression method"
+	if this.bad_call_sequence {
+		return base."#bad call sequence"
+	} else if not this.want_dictionary {
+		x = args.src.read_u16be?()
+		if ((x >> 8) & 0x0F) <> 0x08 {
+			return "#bad compression method"
+		}
+		if (x >> 12) > 0x07 {
+			return "#bad compression window size"
+		}
+		if (x % 31) <> 0 {
+			return "#bad parity check"
+		}
+		this.want_dictionary = (x & 0x20) <> 0
+		if this.want_dictionary {
+			this.dict_id_got = 1  // Adler-32 initial value.
+			this.dict_id_want = args.src.read_u32be?()
+			return "@dictionary required"
+		} else if this.got_dictionary {
+			return "#incorrect dictionary"
+		}
+	} else if this.dict_id_got <> this.dict_id_want {
+		if this.got_dictionary {
+			return "#incorrect dictionary"
+		}
+		return "@dictionary required"
 	}
-	if (x >> 12) > 0x07 {
-		return "#bad compression window size"
-	}
-	if (x & 0x20) <> 0 {
-		return "#TODO: unsupported preset dictionary"
-	}
-	if (x % 31) <> 0 {
-		return "#bad parity check"
-	}
+
+	this.header_complete = true
 
 	// Decode and checksum the DEFLATE-encoded payload.
 	while true {
@@ -81,4 +122,7 @@
 	if (not this.ignore_checksum) and (checksum_got <> checksum_want) {
 		return "#bad checksum"
 	}
+
+	// TODO: reset state (e.g. want_dictionary), so that we can read concat'ed
+	// zlib streams?
 }
diff --git a/test/c/std/zlib.c b/test/c/std/zlib.c
index c1b8c7e..af84d0a 100644
--- a/test/c/std/zlib.c
+++ b/test/c/std/zlib.c
@@ -227,6 +227,56 @@
                             UINT64_MAX);
 }
 
+const char* test_wuffs_zlib_decode_sheep() {
+  CHECK_FOCUS(__func__);
+  wuffs_base__io_buffer got = ((wuffs_base__io_buffer){
+      .data = global_got_slice,
+  });
+  wuffs_base__io_buffer src =
+      make_io_buffer_from_string(zlib_sheep_src_ptr, zlib_sheep_src_len);
+
+  wuffs_zlib__decoder dec;
+  const char* status = wuffs_zlib__decoder__initialize(
+      &dec, sizeof dec, WUFFS_VERSION, WUFFS_INITIALIZE__DEFAULT_OPTIONS);
+  if (status) {
+    RETURN_FAIL("initialize: %s", status);
+  }
+
+  int i;
+  for (i = 0; i < 3; i++) {
+    status = wuffs_zlib__decoder__decode_io_writer(&dec, &got, &src,
+                                                   global_work_slice);
+
+    if (status != wuffs_zlib__warning__dictionary_required) {
+      RETURN_FAIL("decode_io_writer (before dict): got \"%s\", want \"%s\"",
+                  status, wuffs_zlib__warning__dictionary_required);
+    }
+
+    uint32_t dict_id_got = wuffs_zlib__decoder__dictionary_id(&dec);
+    uint32_t dict_id_want = 0x0BE0026E;
+    if (dict_id_got != dict_id_want) {
+      RETURN_FAIL("dictionary_id: got 0x%08" PRIX32 ", want 0x%08x" PRIX32,
+                  dict_id_got, dict_id_want);
+    }
+  }
+
+  wuffs_zlib__decoder__add_dictionary(
+      &dec, ((wuffs_base__slice_u8){
+                .ptr = ((uint8_t*)(zlib_sheep_dict_ptr)),
+                .len = zlib_sheep_dict_len,
+            }));
+
+  status = wuffs_zlib__decoder__decode_io_writer(&dec, &got, &src,
+                                                 global_work_slice);
+  if (status) {
+    RETURN_FAIL("decode_io_writer (after dict): %s", status);
+  }
+
+  wuffs_base__io_buffer want =
+      make_io_buffer_from_string(zlib_sheep_want_ptr, zlib_sheep_want_len);
+  return check_io_buffers_equal("", &got, &want);
+}
+
   // ---------------- Mimic Tests
 
 #ifdef WUFFS_MIMIC
@@ -310,6 +360,7 @@
     test_wuffs_zlib_checksum_verify_good,  //
     test_wuffs_zlib_decode_midsummer,      //
     test_wuffs_zlib_decode_pi,             //
+    test_wuffs_zlib_decode_sheep,          //
 
 #ifdef WUFFS_MIMIC