Tweak JSON trailing comment/new-line quirks

commit: cd4cbc9102e7c58ecd6561d8c2d2055f76de4141 [log] [tgz]
author: Nigel Tao <nigeltao@golang.org> Tue Sep 22 22:22:15 2020 +1000
committer: Nigel Tao <nigeltao@golang.org> Tue Sep 22 23:03:49 2020 +1000
tree: a90bbba68b32fc5ce57768d6220260d9eb2c4f27
parent: 3e8cb4ebf8a0b024549e1086c7ed65d5df0fe8fa [diff]
diff --git a/example/jsonptr/jsonptr.cc b/example/jsonptr/jsonptr.cc
index 0ee259b..bd0c172 100644
--- a/example/jsonptr/jsonptr.cc
+++ b/example/jsonptr/jsonptr.cc

@@ -470,6 +470,7 @@
   in_dict_after_brace,
   in_dict_after_key,
   in_dict_after_value,
+  end_of_data,
 } g_ctx;
 
 bool  //
@@ -934,7 +935,6 @@
   if (g_flags.input_allow_comments) {
     g_dec.set_quirk_enabled(WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK, true);
     g_dec.set_quirk_enabled(WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE, true);
-    g_dec.set_quirk_enabled(WUFFS_JSON__QUIRK_ALLOW_TRAILING_COMMENT, true);
   }
   if (g_flags.input_allow_extra_comma) {
     g_dec.set_quirk_enabled(WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA, true);
@@ -943,11 +943,11 @@
     g_dec.set_quirk_enabled(WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS, true);
   }
 
-  // Consume an optional whitespace trailer. This isn't part of the JSON spec,
-  // but it works better with line oriented Unix tools (such as "echo 123 |
-  // jsonptr" where it's "echo", not "echo -n") or hand-edited JSON files which
-  // can accidentally contain trailing whitespace.
-  g_dec.set_quirk_enabled(WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE, true);
+  // Consume any optional trailing whitespace and comments. This isn't part of
+  // the JSON spec, but it works better with line oriented Unix tools (such as
+  // "echo 123 | jsonptr" where it's "echo", not "echo -n") or hand-edited JSON
+  // files which can accidentally contain trailing whitespace.
+  g_dec.set_quirk_enabled(WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER, true);
 
   return nullptr;
 }
@@ -1301,7 +1301,8 @@
                 if (g_ctx == context::in_dict_after_key) {
                   TRY(write_dst(":", 1));
                 } else if ((g_ctx != context::in_list_after_bracket) &&
-                           (g_ctx != context::in_dict_after_brace)) {
+                           (g_ctx != context::in_dict_after_brace) &&
+                           (g_ctx != context::end_of_data)) {
                   TRY(write_dst(",", 1));
                 }
                 if (!g_flags.compact_output) {
@@ -1328,14 +1329,29 @@
       start_of_token_chain = !t.continued();
       if (z == nullptr) {
         continue;
-      } else if (z == g_eod) {
-        goto end_of_data;
+      } else if (z != g_eod) {
+        return z;
+      } else if (g_flags.query_c_string && *g_flags.query_c_string) {
+        // With a non-empty g_query, don't try to consume trailing filler or
+        // confirm that we've processed all the tokens.
+        return nullptr;
       }
-      return z;
+      g_ctx = context::end_of_data;
     }
 
     if (status.repr == nullptr) {
-      return "main: internal error: unexpected end of token stream";
+      if (g_ctx != context::end_of_data) {
+        return "main: internal error: unexpected end of token stream";
+      }
+      // Check that we've exhausted the input.
+      if ((g_src.meta.ri == g_src.meta.wi) && !g_src.meta.closed) {
+        TRY(read_src());
+      }
+      if ((g_src.meta.ri < g_src.meta.wi) || !g_src.meta.closed) {
+        return "main: valid JSON followed by further (unexpected) data";
+      }
+      // All done.
+      return nullptr;
     } else if (status.repr == wuffs_base__suspension__short_read) {
       if (g_cursor_index != g_src.meta.ri) {
         return "main: internal error: inconsistent g_src indexes";
@@ -1348,34 +1364,6 @@
       return status.message();
     }
   }
-end_of_data:
-
-  // With a non-empty g_query, don't try to consume trailing whitespace or
-  // confirm that we've processed all the tokens.
-  if (g_flags.query_c_string && *g_flags.query_c_string) {
-    return nullptr;
-  }
-
-  // Check that we've exhausted the input.
-  if ((g_src.meta.ri == g_src.meta.wi) && !g_src.meta.closed) {
-    TRY(read_src());
-  }
-  if ((g_src.meta.ri < g_src.meta.wi) || !g_src.meta.closed) {
-    return "main: valid JSON followed by further (unexpected) data";
-  }
-
-  // Check that we've used all of the decoded tokens, other than trailing
-  // filler tokens. For example, "true\n" is valid JSON (and fully consumed
-  // with WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE enabled) with a trailing
-  // filler token for the "\n".
-  for (; g_tok.meta.ri < g_tok.meta.wi; g_tok.meta.ri++) {
-    if (g_tok.data.ptr[g_tok.meta.ri].value_base_category() !=
-        WUFFS_BASE__TOKEN__VBC__FILLER) {
-      return "main: internal error: decoded OK but unprocessed tokens remain";
-    }
-  }
-
-  return nullptr;
 }
 
 int  //
@@ -1441,7 +1429,7 @@
 
   const char* z = main1(argc, argv);
   if (g_wrote_to_dst) {
-    const char* z1 = write_dst("\n", 1);
+    const char* z1 = g_is_after_comment ? nullptr : write_dst("\n", 1);
     const char* z2 = flush_dst();
     z = z ? z : (z1 ? z1 : z2);
   }

diff --git a/fuzz/c/std/json_fuzzer.c b/fuzz/c/std/json_fuzzer.c
index 0a83aa5..2e1fa2c 100644
--- a/fuzz/c/std/json_fuzzer.c
+++ b/fuzz/c/std/json_fuzzer.c

@@ -240,8 +240,7 @@
       WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS,
       WUFFS_JSON__QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR,
       WUFFS_JSON__QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK,
-      WUFFS_JSON__QUIRK_ALLOW_TRAILING_COMMENT,
-      WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE,
+      WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER,
       WUFFS_JSON__QUIRK_REPLACE_INVALID_UNICODE,
       0,
   };

diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 3f4ad79..855abda 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c

@@ -7661,8 +7661,12 @@
 
 #define WUFFS_JSON__QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK 1225364496
 
+#define WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER 1225364497
+
 #define WUFFS_JSON__QUIRK_ALLOW_TRAILING_COMMENT 1225364497
 
+#define WUFFS_JSON__QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF 1225364498
+
 #define WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE 1225364498
 
 #define WUFFS_JSON__QUIRK_JSON_POINTER_ALLOW_TILDE_R_TILDE_N 1225364499
@@ -7760,15 +7764,15 @@
     bool f_quirks[21];
     bool f_allow_leading_ars;
     bool f_allow_leading_ubom;
-    bool f_allow_trailing_comment;
     bool f_end_of_data;
+    uint8_t f_trailer_stop;
     uint8_t f_comment_type;
 
     uint32_t p_decode_tokens[1];
     uint32_t p_decode_leading[1];
     uint32_t p_decode_comment[1];
     uint32_t p_decode_inf_nan[1];
-    uint32_t p_decode_trailing_new_line[1];
+    uint32_t p_decode_trailer[1];
   } private_impl;
 
   struct {
@@ -25737,7 +25741,7 @@
     wuffs_base__io_buffer* a_src);
 
 static wuffs_base__status
-wuffs_json__decoder__decode_trailing_new_line(
+wuffs_json__decoder__decode_trailer(
     wuffs_json__decoder* self,
     wuffs_base__token_buffer* a_dst,
     wuffs_base__io_buffer* a_src);
@@ -25960,6 +25964,12 @@
       status = wuffs_base__make_status(wuffs_base__note__end_of_data);
       goto ok;
     }
+    if (self->private_impl.f_quirks[18]) {
+      if (self->private_impl.f_quirks[11] || self->private_impl.f_quirks[12] || self->private_impl.f_quirks[17]) {
+        status = wuffs_base__make_status(wuffs_json__error__bad_quirk_combination);
+        goto exit;
+      }
+    }
     if (self->private_impl.f_quirks[15] || self->private_impl.f_quirks[16]) {
       if (a_dst) {
         a_dst->meta.wi = ((size_t)(iop_a_dst - a_dst->data.ptr));
@@ -26832,7 +26842,9 @@
             if (status.repr) {
               goto suspend;
             }
-            goto label__outer__continue;
+            if (self->private_impl.f_comment_type > 0) {
+              goto label__outer__continue;
+            }
           }
         }
         status = wuffs_base__make_status(wuffs_json__error__bad_input);
@@ -26845,7 +26857,7 @@
       v_expect = v_expect_after_value;
     }
     label__outer__break:;
-    if (self->private_impl.f_quirks[18]) {
+    if (self->private_impl.f_quirks[17] || self->private_impl.f_quirks[18]) {
       if (a_dst) {
         a_dst->meta.wi = ((size_t)(iop_a_dst - a_dst->data.ptr));
       }
@@ -26853,7 +26865,7 @@
         a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
       }
       WUFFS_BASE__COROUTINE_SUSPENSION_POINT(24);
-      status = wuffs_json__decoder__decode_trailing_new_line(self, a_dst, a_src);
+      status = wuffs_json__decoder__decode_trailer(self, a_dst, a_src);
       if (a_dst) {
         iop_a_dst = a_dst->data.ptr + a_dst->meta.wi;
       }
@@ -27240,8 +27252,8 @@
         goto label__0__continue;
       }
       if (a_src && a_src->meta.closed) {
-        status = wuffs_base__make_status(wuffs_json__error__bad_input);
-        goto exit;
+        status = wuffs_base__make_status(NULL);
+        goto ok;
       }
       status = wuffs_base__make_status(wuffs_base__suspension__short_read);
       WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(2);
@@ -27348,8 +27360,6 @@
         }
       }
     }
-    status = wuffs_base__make_status(wuffs_json__error__bad_input);
-    goto exit;
 
     goto ok;
     ok:
@@ -27535,10 +27545,10 @@
   return status;
 }
 
-// -------- func json.decoder.decode_trailing_new_line
+// -------- func json.decoder.decode_trailer
 
 static wuffs_base__status
-wuffs_json__decoder__decode_trailing_new_line(
+wuffs_json__decoder__decode_trailer(
     wuffs_json__decoder* self,
     wuffs_base__token_buffer* a_dst,
     wuffs_base__io_buffer* a_src) {
@@ -27571,11 +27581,15 @@
     io2_a_src = io0_a_src + a_src->meta.wi;
   }
 
-  uint32_t coro_susp_point = self->private_impl.p_decode_trailing_new_line[0];
+  uint32_t coro_susp_point = self->private_impl.p_decode_trailer[0];
   switch (coro_susp_point) {
     WUFFS_BASE__COROUTINE_SUSPENSION_POINT_0;
 
-    self->private_impl.f_allow_trailing_comment = self->private_impl.f_quirks[17];
+    if (self->private_impl.f_quirks[18]) {
+      self->private_impl.f_trailer_stop = 10;
+    } else {
+      self->private_impl.f_trailer_stop = 0;
+    }
     label__outer__continue:;
     while (true) {
       if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
@@ -27608,44 +27622,44 @@
                 (((uint64_t)(v_whitespace_length)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
             v_whitespace_length = 0;
           }
-          if (self->private_impl.f_allow_trailing_comment) {
-            if (a_dst) {
-              a_dst->meta.wi = ((size_t)(iop_a_dst - a_dst->data.ptr));
-            }
-            if (a_src) {
-              a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
-            }
-            WUFFS_BASE__COROUTINE_SUSPENSION_POINT(3);
-            status = wuffs_json__decoder__decode_comment(self, a_dst, a_src);
-            if (a_dst) {
-              iop_a_dst = a_dst->data.ptr + a_dst->meta.wi;
-            }
-            if (a_src) {
-              iop_a_src = a_src->data.ptr + a_src->meta.ri;
-            }
-            if (status.repr) {
-              goto suspend;
-            }
-            v_c = 0;
-            v_whitespace_length = 0;
-            if (self->private_impl.f_comment_type == 1) {
-              self->private_impl.f_allow_trailing_comment = false;
-              goto label__outer__continue;
-            } else if (self->private_impl.f_comment_type == 2) {
-              goto label__outer__break;
-            }
+          if (self->private_impl.f_trailer_stop > 0) {
+            status = wuffs_base__make_status(wuffs_json__error__bad_input);
+            goto exit;
           }
-          status = wuffs_base__make_status(wuffs_json__error__bad_input);
-          goto exit;
+          if (a_dst) {
+            a_dst->meta.wi = ((size_t)(iop_a_dst - a_dst->data.ptr));
+          }
+          if (a_src) {
+            a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
+          }
+          WUFFS_BASE__COROUTINE_SUSPENSION_POINT(3);
+          status = wuffs_json__decoder__decode_comment(self, a_dst, a_src);
+          if (a_dst) {
+            iop_a_dst = a_dst->data.ptr + a_dst->meta.wi;
+          }
+          if (a_src) {
+            iop_a_src = a_src->data.ptr + a_src->meta.ri;
+          }
+          if (status.repr) {
+            goto suspend;
+          }
+          v_c = 0;
+          v_whitespace_length = 0;
+          if (self->private_impl.f_comment_type > 0) {
+            goto label__outer__continue;
+          }
+          status = wuffs_base__make_status(NULL);
+          goto ok;
         }
         (iop_a_src += 1, wuffs_base__make_empty_struct());
-        if ((v_whitespace_length >= 65534) || (v_c == 10)) {
+        if ((v_whitespace_length >= 65534) || (v_c == self->private_impl.f_trailer_stop)) {
           *iop_a_dst++ = wuffs_base__make_token(
               (((uint64_t)(0)) << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
               (((uint64_t)((v_whitespace_length + 1))) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
           v_whitespace_length = 0;
-          if (v_c == 10) {
-            goto label__outer__break;
+          if (v_c == self->private_impl.f_trailer_stop) {
+            status = wuffs_base__make_status(NULL);
+            goto ok;
           }
           goto label__outer__continue;
         }
@@ -27656,13 +27670,13 @@
 
     goto ok;
     ok:
-    self->private_impl.p_decode_trailing_new_line[0] = 0;
+    self->private_impl.p_decode_trailer[0] = 0;
     goto exit;
   }
 
   goto suspend;
   suspend:
-  self->private_impl.p_decode_trailing_new_line[0] = wuffs_base__status__is_suspension(&status) ? coro_susp_point : 0;
+  self->private_impl.p_decode_trailer[0] = wuffs_base__status__is_suspension(&status) ? coro_susp_point : 0;
 
   goto exit;
   exit:

diff --git a/script/print-json-token-debug-format.c b/script/print-json-token-debug-format.c
index 29342c5..b92a3a9 100644
--- a/script/print-json-token-debug-format.c
+++ b/script/print-json-token-debug-format.c

@@ -298,7 +298,7 @@
         WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS,
         WUFFS_JSON__QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR,
         WUFFS_JSON__QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK,
-        WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE,
+        WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER,
         WUFFS_JSON__QUIRK_REPLACE_INVALID_UNICODE,
         0,
     };

diff --git a/std/json/decode_json.wuffs b/std/json/decode_json.wuffs
index df48da1..bd83ae8 100644
--- a/std/json/decode_json.wuffs
+++ b/std/json/decode_json.wuffs

@@ -15,14 +15,15 @@
 pub struct decoder? implements base.token_decoder(
 	quirks : array[QUIRKS_COUNT] base.bool,
 
-	allow_leading_ars      : base.bool,
-	allow_leading_ubom     : base.bool,
-	allow_trailing_comment : base.bool,
+	allow_leading_ars  : base.bool,
+	allow_leading_ubom : base.bool,
 
 	end_of_data : base.bool,
 
+	trailer_stop : base.u8,
+
 	// comment_type is set as a side-effect of decode_comment?.
-	//  - 0 means failure.
+	//  - 0 means no comment.
 	//  - 1 means a block comment.
 	//  - 2 means a line  comment.
 	comment_type : base.u8,
@@ -123,6 +124,14 @@
 		return base."@end of data"
 	}
 
+	if this.quirks[QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF - QUIRKS_BASE] {
+		if this.quirks[QUIRK_ALLOW_COMMENT_BLOCK - QUIRKS_BASE] or
+			this.quirks[QUIRK_ALLOW_COMMENT_LINE - QUIRKS_BASE] or
+			this.quirks[QUIRK_ALLOW_TRAILING_FILLER - QUIRKS_BASE] {
+			return "#bad quirk combination"
+		}
+	}
+
 	if this.quirks[QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR - QUIRKS_BASE] or
 		this.quirks[QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK - QUIRKS_BASE] {
 		this.decode_leading?(dst: args.dst, src: args.src)
@@ -1176,7 +1185,9 @@
 			if this.quirks[QUIRK_ALLOW_COMMENT_BLOCK - QUIRKS_BASE] or
 				this.quirks[QUIRK_ALLOW_COMMENT_LINE - QUIRKS_BASE] {
 				this.decode_comment?(dst: args.dst, src: args.src)
-				continue.outer
+				if this.comment_type > 0 {
+					continue.outer
+				}
 			}
 		}
 
@@ -1191,8 +1202,9 @@
 		expect = expect_after_value
 	} endwhile.outer
 
-	if this.quirks[QUIRK_ALLOW_TRAILING_NEW_LINE - QUIRKS_BASE] {
-		this.decode_trailing_new_line?(dst: args.dst, src: args.src)
+	if this.quirks[QUIRK_ALLOW_TRAILING_FILLER - QUIRKS_BASE] or
+		this.quirks[QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF - QUIRKS_BASE] {
+		this.decode_trailer?(dst: args.dst, src: args.src)
 	}
 
 	this.end_of_data = true
@@ -1438,7 +1450,7 @@
 			continue
 		}
 		if args.src.is_closed() {
-			return "#bad input"
+			return ok
 		}
 		yield? base."$short read"
 	} endwhile
@@ -1562,8 +1574,6 @@
 			} endwhile
 		} endwhile.comment_line
 	}
-
-	return "#bad input"
 }
 
 pri func decoder.decode_inf_nan?(dst: base.token_writer, src: base.io_reader) {
@@ -1677,11 +1687,15 @@
 	} endwhile
 }
 
-pri func decoder.decode_trailing_new_line?(dst: base.token_writer, src: base.io_reader) {
+pri func decoder.decode_trailer?(dst: base.token_writer, src: base.io_reader) {
 	var c                 : base.u8
 	var whitespace_length : base.u32[..= 0xFFFE]
 
-	this.allow_trailing_comment = this.quirks[QUIRK_ALLOW_TRAILING_COMMENT - QUIRKS_BASE]
+	if this.quirks[QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF - QUIRKS_BASE] {
+		this.trailer_stop = '\n'
+	} else {
+		this.trailer_stop = 0
+	}
 
 	while.outer true {
 		if args.dst.length() <= 0 {
@@ -1714,27 +1728,25 @@
 						value_major: 0, value_minor: 0, continued: 0, length: whitespace_length)
 					whitespace_length = 0
 				}
-				if this.allow_trailing_comment {
-					this.decode_comment?(dst: args.dst, src: args.src)
-					c = 0
-					whitespace_length = 0
-					if this.comment_type == 1 {  // Block comment.
-						this.allow_trailing_comment = false
-						continue.outer
-					} else if this.comment_type == 2 {  // Line comment.
-						break.outer
-					}
+				if this.trailer_stop > 0 {
+					return "#bad input"
 				}
-				return "#bad input"
+				this.decode_comment?(dst: args.dst, src: args.src)
+				c = 0
+				whitespace_length = 0
+				if this.comment_type > 0 {
+					continue.outer
+				}
+				return ok
 			}
 
 			args.src.skip_u32_fast!(actual: 1, worst_case: 1)
-			if (whitespace_length >= 0xFFFE) or (c == '\n') {
+			if (whitespace_length >= 0xFFFE) or (c == this.trailer_stop) {
 				args.dst.write_simple_token_fast!(
 					value_major: 0, value_minor: 0, continued: 0, length: whitespace_length + 1)
 				whitespace_length = 0
-				if c == '\n' {
-					break.outer
+				if c == this.trailer_stop {
+					return ok
 				}
 				continue.outer
 			}

diff --git a/std/json/decode_quirks.wuffs b/std/json/decode_quirks.wuffs
index 5f31b5b..9970c35 100644
--- a/std/json/decode_quirks.wuffs
+++ b/std/json/decode_quirks.wuffs

@@ -97,16 +97,17 @@
 pub const QUIRK_ALLOW_BACKSLASH_ZERO : base.u32 = 0x4909_9400 | 0x0A
 
 // When this quirk is enabled, "/* C/C++ style block comments */" are accepted
-// anywhere whitespace would be, although see the QUIRK_ALLOW_TRAILING_COMMENT
-// comment for additional interaction when combining multiple quirks.
+// anywhere whitespace would be. See also QUIRK_ALLOW_TRAILING_FILLER.
 //
 // They produce WUFFS_BASE__TOKEN__VBD__FILLER__COMMENT_BLOCK tokens. The token
 // chain's source bytes includes the starting "/*" and the ending "*/".
+//
+// To avoid ambiguity (as comments can contain new lines), this quirk cannot be
+// combined with QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF.
 pub const QUIRK_ALLOW_COMMENT_BLOCK : base.u32 = 0x4909_9400 | 0x0B
 
 // When this quirk is enabled, "// C/C++ style line comments\n" are accepted
-// anywhere whitespace would be, although see the QUIRK_ALLOW_TRAILING_COMMENT
-// comment for additional interaction when combining multiple quirks.
+// anywhere whitespace would be. See also QUIRK_ALLOW_TRAILING_FILLER.
 //
 // A line comment may not omit the ending "\n", even if there is no input
 // afterwards (i.e. the prospective line comment ends with the end-of-file).
@@ -117,6 +118,9 @@
 // Even if the line comments are on consecutive lines, each line comment is a
 // separate token chain. There may be whitespace tokens between one line
 // comment's ending "\n" and the next one's starting "//".
+//
+// To avoid ambiguity (as comments can contain new lines), this quirk cannot be
+// combined with QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF.
 pub const QUIRK_ALLOW_COMMENT_LINE : base.u32 = 0x4909_9400 | 0x0C
 
 // When this quirk is enabled, there may be a comma after the final array
@@ -140,8 +144,9 @@
 // When combined with QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK, either mark
 // may come first in the byte stream.
 //
-// When combined with QUIRK_ALLOW_TRAILING_NEW_LINE, this format is also known
-// as RFC 7464, Json Text Sequences and MIME type "application/json-seq".
+// When combined with QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF, this format is
+// also known as RFC 7464, Json Text Sequences and MIME type
+// "application/json-seq".
 pub const QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR : base.u32 = 0x4909_9400 | 0x0F
 
 // When this quirk is enabled, the input byte stream may optionally start with
@@ -152,40 +157,43 @@
 // may come first in the byte stream.
 pub const QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK : base.u32 = 0x4909_9400 | 0x10
 
-// When this quirk is enabled and both:
-//  - QUIRK_ALLOW_TRAILING_NEW_LINE is enabled,
-//  - at least one of QUIRK_ALLOW_COMMENT_ETC is enabled,
-// the trailing whitespace may optionally contain a single comment. As per
-// QUIRK_ALLOW_TRAILING_NEW_LINE, processing will still stop at the first
-// trailing '\n' (outside of a block comment), even if more comments followed.
+// When this quirk is enabled, following a successful decoding of a top-level
+// JSON value, any trailing whitespace (ASCII characters 0x09, 0x0A, 0x0D or
+// 0x20) and/or comments (if QUIRK_ALLOW_COMMENT_ETC is enabled) are also
+// consumed (and WUFFS_BASE__TOKEN__VBC__FILLER tokens emitted) up to but
+// excluding the end-of-file or the next non-filler byte.
 //
-// For a trailing block comment, new lines within the comment are not counted
-// and after the comment concludes, the decoder will continue consuming
-// whitespace up to and including the next '\n' (or end-of-file).
+// Trailing non-filler is not an error. Decoding simply stops before it.
 //
-// For a trailing line comment, the decoder stops immediately after the
-// comment. If not stopped by end-of-file, this stops after the '\n' that
-// concludes (and is part of) the comment. One implication is that if multiple
-// line comments trail a JSON value, only the first one will be processed.
+// To avoid ambiguity, this quirk cannot be combined with
+// QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF. Unlike that quirk, enabling this
+// quirk will consume multiple trailing '\n' bytes.
+pub const QUIRK_ALLOW_TRAILING_FILLER : base.u32 = 0x4909_9400 | 0x11
+
+// Deprecated: equivalent to QUIRK_ALLOW_TRAILING_FILLER.
 pub const QUIRK_ALLOW_TRAILING_COMMENT : base.u32 = 0x4909_9400 | 0x11
 
 // When this quirk is enabled, following a successful decoding of a top-level
 // JSON value, any trailing whitespace (ASCII characters 0x09, 0x0A, 0x0D or
 // 0x20) is also consumed (and WUFFS_BASE__TOKEN__VBC__FILLER tokens emitted)
-// up to the end-of-file or up to and including a single new line (ASCII 0x0A,
-// also known as '\n'), whichever comes first. This trailing whitespace is not
-// mandatory, but it is consumed if present.
+// up to but excluding the end-of-file or up to and including a single new line
+// (ASCII 0x0A, also known as '\n'), whichever comes first. A trailing '\n' is
+// not mandatory (if at end-of-file), but it is consumed if present and will
+// stop decoding.
 //
-// When enabled, trailing non-whitespace (before a '\n') is an error (unless it
-// is a comment and QUIRK_ALLOW_TRAILING_COMMENT is enabled; see above). For
-// example, with "007" input, decoding with this quirk disabled (the default
-// case) will consume just 1 byte ("0") and leave the rest ("07") unread
-// without error, as "0" is a perfectly valid JSON value (but "00" is not).
-// Decoding "007" (or "007\n") with this quirk enabled will return an error.
+// Trailing non-whitespace after a trailing '\n' is ignored.
 //
-// When enabled, the decoder will not consume more than one trailing '\n'
-// (outside of a block comment), nor will it consume any other whitespace
-// immediately after a trailing '\n'.
+// Trailing non-whitespace, before EOF or '\n', is an error. For example, with
+// "007" input, decoding with this quirk disabled (the default case) will
+// consume just 1 byte ("0") and leave the rest ("07") unread without error, as
+// "0" is a perfectly valid JSON value (but "00" is not). Decoding "007" (or
+// "007\n") with this quirk enabled will return an error.
+//
+// To avoid ambiguity (as comments can contain new lines), this quirk cannot be
+// combined with any of:
+//  - QUIRK_ALLOW_COMMENT_BLOCK
+//  - QUIRK_ALLOW_COMMENT_LINE
+//  - QUIRK_ALLOW_TRAILING_FILLER
 //
 // If a JSON encoder avoids emitting (optional) '\n' bytes, other than a single
 // '\n' after each top-level value in a multi-JSON-value stream, this format is
@@ -199,13 +207,9 @@
 // When combined with QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR, this format
 // is also known as RFC 7464, Json Text Sequences and MIME type
 // "application/json-seq".
-//
-// When combined with QUIRK_ALLOW_COMMENT_BLOCK or QUIRK_ALLOW_COMMENT_LINE, it
-// is an error for a comment to occur in this trailing whitespace, unless
-// QUIRK_ALLOW_TRAILING_COMMENT is also enabled. Be aware that block comments
-// can contain multiple new lines, so combining such quirks can break the
-// "exactly one JSON value per line" assumption for newline-delimited but
-// otherwise compact (minified) JSON.
+pub const QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF : base.u32 = 0x4909_9400 | 0x12
+
+// Deprecated: equivalent to QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF.
 pub const QUIRK_ALLOW_TRAILING_NEW_LINE : base.u32 = 0x4909_9400 | 0x12
 
 // When this quirk is enabled, JSON Pointer strings containing "~r" or "~n",

diff --git a/test/c/std/json.c b/test/c/std/json.c
index 413e586..878ca55 100644
--- a/test/c/std/json.c
+++ b/test/c/std/json.c

@@ -2537,7 +2537,7 @@
         WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS,
         WUFFS_JSON__QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR,
         WUFFS_JSON__QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK,
-        WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE,
+        WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER,
         WUFFS_JSON__QUIRK_REPLACE_INVALID_UNICODE,
         0,
     };
@@ -3386,24 +3386,51 @@
 }
 
 const char*  //
-test_wuffs_json_decode_quirk_allow_trailing_comment() {
+test_wuffs_json_decode_quirk_allow_trailing_comments() {
   CHECK_FOCUS(__func__);
 
-  // These test cases all end with two '\n' bytes. If the first byte is '8'
-  // then decoding should succeed, and stop between those two '\n' bytes.
-  // Otherwise, decoding should fail.
+  // The first byte is a code.
+  //  - '1' means that there is zero or one '\n' bytes
+  //  - '2' means that there are two '\n' bytes but no comments
+  //  - '3' means that there are comments
+  //  - '4' means that there is non-filler after  eof-or-'\n'
+  //  - '5' means that there is non-filler before eof-or-'\n'
+  //
+  // WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER (together with
+  // WUFFS_JSON__QUIRK_ALLOW_COMMENT_ETC) should decode the '1's, '2's and '3's
+  // completely and the '4's and '5's up to but excluding the non-filler.
+  //
+  // WUFFS_JSON__QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF should decode the '1's
+  // completely and the '2's and '4's just after the first '\n'.
   const char* test_cases[] = {
-      "80\n\n",                //
-      "81 \n\n",               //
-      "82 /*foo*/ \n\n",       //
-      "83/*bar\nbaz*/\n\n",    //
-      "84 // qux\n\n",         //
-      "95 /*c0*/ /*c1*/\n\n",  //
-      "96 /*c0*/ // c2 \n\n",  //
+      "100",                         //
+      "101 \n",                      //
+      "202\n\n",                     //
+      "203 \n\n",                    //
+      "304 /*foo*/",                 //
+      "305 /*foo*/ ",                //
+      "306 /*foo*/ \n",              //
+      "307 /*foo*/ \n\n",            //
+      "308/*bar\nbaz*/\n\n",         //
+      "309 // qux\n\n",              //
+      "310 /*c0*/ /*c1*/\n\n",       //
+      "311 /*c0*/ \n\n // c2 \n\n",  //
+      "412 \n9",                     //
+      "513 9",                       //
   };
 
   int tc;
+
+  // Test ALLOW_ETC.
   for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) {
+    void* tc_ptr = (void*)(test_cases[tc]);
+    size_t tc_len = strlen(test_cases[tc]);
+    char code = test_cases[tc][0];
+
+    wuffs_base__token_buffer tok =
+        wuffs_base__slice_token__writer(g_have_slice_token);
+    wuffs_base__io_buffer src =
+        wuffs_base__ptr_u8__reader(tc_ptr, tc_len, true);
     wuffs_json__decoder dec;
     CHECK_STATUS("initialize", wuffs_json__decoder__initialize(
                                    &dec, sizeof dec, WUFFS_VERSION,
@@ -3413,81 +3440,119 @@
     wuffs_json__decoder__set_quirk_enabled(
         &dec, WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE, true);
     wuffs_json__decoder__set_quirk_enabled(
-        &dec, WUFFS_JSON__QUIRK_ALLOW_TRAILING_COMMENT, true);
-    wuffs_json__decoder__set_quirk_enabled(
-        &dec, WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE, true);
+        &dec, WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER, true);
 
+    const char* have_repr =
+        wuffs_json__decoder__decode_tokens(&dec, &tok, &src, g_work_slice_u8)
+            .repr;
+    if (have_repr != NULL) {
+      RETURN_FAIL("tc=%d, ALLOW_ETC: decode_tokens: have \"%s\", want NULL", tc,
+                  have_repr);
+    }
+
+    size_t have_total_length = 0;
+    while (tok.meta.ri < tok.meta.wi) {
+      have_total_length +=
+          wuffs_base__token__length(&tok.data.ptr[tok.meta.ri++]);
+    }
+    size_t want_total_length = tc_len - ((code >= '4') ? 1 : 0);
+    if (have_total_length != src.meta.ri) {
+      RETURN_FAIL("tc=%d, ALLOW_ETC: total_length: have %zu, want %zu", tc,
+                  have_total_length, src.meta.ri);
+    } else if (have_total_length != want_total_length) {
+      RETURN_FAIL("tc=%d, ALLOW_ETC: total_length: have %zu, want %zu", tc,
+                  have_total_length, want_total_length);
+    }
+  }
+
+  // Test EXPECT_ETC.
+  for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) {
     void* tc_ptr = (void*)(test_cases[tc]);
     size_t tc_len = strlen(test_cases[tc]);
+    char code = test_cases[tc][0];
+
     wuffs_base__token_buffer tok =
         wuffs_base__slice_token__writer(g_have_slice_token);
     wuffs_base__io_buffer src =
         wuffs_base__ptr_u8__reader(tc_ptr, tc_len, true);
-    const char* have =
+    wuffs_json__decoder dec;
+    CHECK_STATUS("initialize", wuffs_json__decoder__initialize(
+                                   &dec, sizeof dec, WUFFS_VERSION,
+                                   WUFFS_INITIALIZE__DEFAULT_OPTIONS));
+    wuffs_json__decoder__set_quirk_enabled(
+        &dec, WUFFS_JSON__QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF, true);
+
+    const char* have_repr =
         wuffs_json__decoder__decode_tokens(&dec, &tok, &src, g_work_slice_u8)
             .repr;
-    const char* want =
-        (test_cases[tc][0] == '8') ? NULL : wuffs_json__error__bad_input;
-    if (have != want) {
-      RETURN_FAIL("tc=%d: decode_tokens: have \"%s\", want \"%s\"", tc, have,
-                  want);
-    } else if (have != NULL) {
+    const char* want_repr =
+        ((code == '3') || (code == '5')) ? wuffs_json__error__bad_input : NULL;
+    if (have_repr != want_repr) {
+      RETURN_FAIL("tc=%d, EXPECT_ETC: decode_tokens: have \"%s\", want \"%s\"",
+                  tc, have_repr, want_repr);
+    } else if (have_repr != NULL) {
       continue;
     }
 
-    size_t total_length = 0;
+    size_t have_total_length = 0;
     while (tok.meta.ri < tok.meta.wi) {
-      total_length += wuffs_base__token__length(&tok.data.ptr[tok.meta.ri++]);
+      have_total_length +=
+          wuffs_base__token__length(&tok.data.ptr[tok.meta.ri++]);
     }
-    if (total_length != src.meta.ri) {
-      RETURN_FAIL("tc=%d: total_length: have %zu, want %zu", tc, total_length,
-                  src.meta.ri);
-    } else if ((total_length + 1) != tc_len) {
-      RETURN_FAIL("tc=%d: total_length+1: have %zu, want %zu", tc,
-                  total_length + 1, tc_len);
+    size_t want_total_length = tc_len - ((code == '1') ? 0 : 1);
+    if (have_total_length != src.meta.ri) {
+      RETURN_FAIL("tc=%d, EXPECT_ETC: total_length: have %zu, want %zu", tc,
+                  have_total_length, src.meta.ri);
+    } else if (have_total_length != want_total_length) {
+      RETURN_FAIL("tc=%d, EXPECT_ETC: total_length: have %zu, want %zu", tc,
+                  have_total_length, want_total_length);
     }
   }
+
   return NULL;
 }
 
 const char*  //
-test_wuffs_json_decode_quirk_allow_trailing_new_line() {
+test_wuffs_json_decode_quirk_allow_trailing_filler() {
   CHECK_FOCUS(__func__);
 
   struct {
-    // want has 2 bytes, one for each possible q:
-    //  - q&1 sets WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE.
+    // want has 3 bytes, one for each possible q:
+    //  - q&1 sets WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER.
+    //  - q&2 sets WUFFS_JSON__QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF.
     // An 'X', '+' or '-' means that decoding should succeed (and consume the
     // entire input), succeed (without consuming the entire input) or fail.
     const char* want;
     const char* str;
   } test_cases[] = {
-      {.want = "++", .str = "0 \n "},      //
-      {.want = "++", .str = "0 \n\n"},     //
-      {.want = "++", .str = "0\n\n"},      //
-      {.want = "+-", .str = "0 true \n"},  //
-      {.want = "+-", .str = "007"},        //
-      {.want = "+-", .str = "007\n"},      //
-      {.want = "+-", .str = "0true "},     //
-      {.want = "+-", .str = "0true"},      //
-      {.want = "+X", .str = "0 "},         //
-      {.want = "+X", .str = "0 \n"},       //
-      {.want = "+X", .str = "0\n"},        //
-      {.want = "+X", .str = "0\t\r\n"},    //
-      {.want = "--", .str = "\n"},         //
-      {.want = "XX", .str = "0"},          //
+      {.want = "+X+", .str = "0 \n "},      //
+      {.want = "+X+", .str = "0 \n\n"},     //
+      {.want = "+X+", .str = "0\n\n"},      //
+      {.want = "++-", .str = "0 true \n"},  //
+      {.want = "++-", .str = "007"},        //
+      {.want = "++-", .str = "007\n"},      //
+      {.want = "++-", .str = "0true "},     //
+      {.want = "++-", .str = "0true"},      //
+      {.want = "+XX", .str = "0 "},         //
+      {.want = "+XX", .str = "0 \n"},       //
+      {.want = "+XX", .str = "0\n"},        //
+      {.want = "+XX", .str = "0\t\r\n"},    //
+      {.want = "---", .str = "\n"},         //
+      {.want = "XXX", .str = "0"},          //
   };
 
   int tc;
   for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) {
     int q;
-    for (q = 0; q < 2; q++) {
+    for (q = 0; q < 3; q++) {
       wuffs_json__decoder dec;
       CHECK_STATUS("initialize", wuffs_json__decoder__initialize(
                                      &dec, sizeof dec, WUFFS_VERSION,
                                      WUFFS_INITIALIZE__DEFAULT_OPTIONS));
       wuffs_json__decoder__set_quirk_enabled(
-          &dec, WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE, q & 1);
+          &dec, WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER, q & 1);
+      wuffs_json__decoder__set_quirk_enabled(
+          &dec, WUFFS_JSON__QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF, q & 2);
 
       wuffs_base__token_buffer tok =
           wuffs_base__slice_token__writer(g_have_slice_token);
@@ -4134,8 +4199,8 @@
     test_wuffs_json_decode_quirk_allow_extra_comma,
     test_wuffs_json_decode_quirk_allow_inf_nan_numbers,
     test_wuffs_json_decode_quirk_allow_leading_etc,
-    test_wuffs_json_decode_quirk_allow_trailing_comment,
-    test_wuffs_json_decode_quirk_allow_trailing_new_line,
+    test_wuffs_json_decode_quirk_allow_trailing_comments,
+    test_wuffs_json_decode_quirk_allow_trailing_filler,
     test_wuffs_json_decode_quirk_replace_invalid_unicode,
     test_wuffs_json_decode_src_io_buffer_length,
     test_wuffs_json_decode_string,
commit	cd4cbc9102e7c58ecd6561d8c2d2055f76de4141	[log] [tgz]
author	Nigel Tao <nigeltao@golang.org>	Tue Sep 22 22:22:15 2020 +1000
committer	Nigel Tao <nigeltao@golang.org>	Tue Sep 22 23:03:49 2020 +1000
tree	a90bbba68b32fc5ce57768d6220260d9eb2c4f27
parent	3e8cb4ebf8a0b024549e1086c7ed65d5df0fe8fa [diff]