Let example/jsonptr output comments Performance (on vanilla, comment-free JSON) slips a little. On a mid-range x86_64 laptop, processing the 181 MiB citylots.json file from github.com/zemirco/sf-city-lots-json: $ time gen/bin/example-jsonptr < citylots.json > /dev/null Before this commit: real 0m1.863s After: real 0m1.911s Ratio: 0.97x Inverse: 1.03x

commit: 2104205d253fc4a31f16cd8e957dd132676c1d63 [log] [tgz]
author: Nigel Tao <nigeltao@golang.org> Wed Aug 19 23:13:54 2020 +1000
committer: Nigel Tao <nigeltao@golang.org> Thu Aug 20 11:14:29 2020 +1000
tree: 03f322f97abc4d84b1d7f5fa1083f0b7c27f0dec
parent: 6b86cbcb2547105a1c598f2f1da44e640c69b9ec [diff]
diff --git a/example/jsonptr/jsonptr.cc b/example/jsonptr/jsonptr.cc
index 3064ef0..2052576 100644
--- a/example/jsonptr/jsonptr.cc
+++ b/example/jsonptr/jsonptr.cc

@@ -158,6 +158,8 @@
     "            -input-allow-comments\n"
     "            -input-allow-extra-comma\n"
     "            -input-allow-inf-nan-numbers\n"
+    "            -jwcc\n"
+    "            -output-comments\n"
     "            -output-extra-comma\n"
     "            -strict-json-pointer-syntax\n"
     "\n"
@@ -179,7 +181,7 @@
     "\n"
     "The -input-allow-comments flag allows \"/*slash-star*/\" and\n"
     "\"//slash-slash\" C-style comments within JSON input. Such comments are\n"
-    "stripped from the output.\n"
+    "stripped from the output unless -output-comments was also set.\n"
     "\n"
     "The -input-allow-extra-comma flag allows input like \"[1,2,]\", with a\n"
     "comma after the final element of a JSON list or dictionary.\n"
@@ -187,12 +189,24 @@
     "The -input-allow-inf-nan-numbers flag allows non-finite floating point\n"
     "numbers (infinities and not-a-numbers) within JSON input.\n"
     "\n"
+    "The -output-comments flag copies any input comments to the output. It\n"
+    "has no effect unless -input-allow-comments was also set. Comments look\n"
+    "better after commas than before them, but a closing \"]\" or \"}\" can\n"
+    "occur after arbitrarily many comments, so -output-comments also requires\n"
+    "that one or both of -compact-output and -output-extra-comma be set.\n"
+    "\n"
     "The -output-extra-comma flag writes output like \"[1,2,]\", with a comma\n"
     "after the final element of a JSON list or dictionary. Such commas are\n"
     "non-compliant with the JSON specification but many parsers accept them\n"
     "and they can produce simpler line-based diffs. This flag is ignored when\n"
     "-compact-output is set.\n"
     "\n"
+    "The -jwcc flag (JSON With Commas and Comments) enables all of:\n"
+    "            -input-allow-comments\n"
+    "            -input-allow-extra-comma\n"
+    "            -output-comments\n"
+    "            -output-extra-comma\n"
+    "\n"
     "----\n"
     "\n"
     "The -q=STR or -query=STR flag gives an optional JSON Pointer query, to\n"
@@ -326,6 +340,8 @@
          (g_ctx == context::in_dict_after_value);
 }
 
+bool g_is_after_comment;
+
 uint32_t g_suppress_write_dst;
 bool g_wrote_to_dst;
 
@@ -592,6 +608,7 @@
   bool input_allow_comments;
   bool input_allow_extra_comma;
   bool input_allow_inf_nan_numbers;
+  bool output_comments;
   bool output_extra_comma;
   bool strict_json_pointer_syntax;
   bool tabs;
@@ -663,6 +680,17 @@
       g_flags.input_allow_inf_nan_numbers = true;
       continue;
     }
+    if (!strcmp(arg, "jwcc")) {
+      g_flags.input_allow_comments = true;
+      g_flags.input_allow_extra_comma = true;
+      g_flags.output_comments = true;
+      g_flags.output_extra_comma = true;
+      continue;
+    }
+    if (!strcmp(arg, "output-comments")) {
+      g_flags.output_comments = true;
+      continue;
+    }
     if (!strcmp(arg, "output-extra-comma")) {
       g_flags.output_extra_comma = true;
       continue;
@@ -725,10 +753,17 @@
 
   g_ctx = context::none;
 
+  g_is_after_comment = false;
+
   TRY(parse_flags(argc, argv));
   if (g_flags.fail_if_unsandboxed && !g_sandboxed) {
     return "main: unsandboxed";
   }
+  if (g_flags.output_comments && !g_flags.compact_output &&
+      !g_flags.output_extra_comma) {
+    return "main: -output-comments requires one or both of -compact-output and "
+           "-output-extra-comma";
+  }
   const int stdin_fd = 0;
   if (g_flags.remaining_argc >
       ((g_input_file_descriptor != stdin_fd) ? 1 : 0)) {
@@ -858,6 +893,26 @@
   return write_dst_slow(s, n);
 }
 
+#define TRY_INDENT_WITH_LEADING_NEW_LINE                                   \
+  do {                                                                     \
+    uint32_t indent = g_depth * g_bytes_per_indent_depth;                  \
+    TRY(write_dst(g_new_line_then_256_indent_bytes, 1 + (indent & 0xFF))); \
+    for (indent >>= 8; indent > 0; indent--) {                             \
+      TRY(write_dst(g_new_line_then_256_indent_bytes + 1, 0x100));         \
+    }                                                                      \
+  } while (false)
+
+// TRY_INDENT_SANS_LEADING_NEW_LINE is used after comments, which print their
+// own "\n".
+#define TRY_INDENT_SANS_LEADING_NEW_LINE                                   \
+  do {                                                                     \
+    uint32_t indent = g_depth * g_bytes_per_indent_depth;                  \
+    TRY(write_dst(g_new_line_then_256_indent_bytes + 1, (indent & 0xFF))); \
+    for (indent >>= 8; indent > 0; indent--) {                             \
+      TRY(write_dst(g_new_line_then_256_indent_bytes + 1, 0x100));         \
+    }                                                                      \
+  } while (false)
+
 // ----
 
 uint8_t  //
@@ -950,13 +1005,13 @@
         if ((g_ctx != context::in_list_after_bracket) &&
             (g_ctx != context::in_dict_after_brace) &&
             !g_flags.compact_output) {
-          if (g_flags.output_extra_comma) {
-            TRY(write_dst(",", 1));
-          }
-          uint32_t indent = g_depth * g_bytes_per_indent_depth;
-          TRY(write_dst(g_new_line_then_256_indent_bytes, 1 + (indent & 0xFF)));
-          for (indent >>= 8; indent > 0; indent--) {
-            TRY(write_dst(g_new_line_then_256_indent_bytes + 1, 0x100));
+          if (g_is_after_comment) {
+            TRY_INDENT_SANS_LEADING_NEW_LINE;
+          } else {
+            if (g_flags.output_extra_comma) {
+              TRY(write_dst(",", 1));
+            }
+            TRY_INDENT_WITH_LEADING_NEW_LINE;
           }
         }
 
@@ -974,7 +1029,9 @@
     // Write preceding whitespace and punctuation, if it wasn't ']', '}' or a
     // continuation of a multi-token chain.
     if (start_of_token_chain) {
-      if (g_ctx == context::in_dict_after_key) {
+      if (g_is_after_comment) {
+        TRY_INDENT_SANS_LEADING_NEW_LINE;
+      } else if (g_ctx == context::in_dict_after_key) {
         TRY(write_dst(": ", g_flags.compact_output ? 1 : 2));
       } else if (g_ctx != context::none) {
         if ((g_ctx != context::in_list_after_bracket) &&
@@ -982,11 +1039,7 @@
           TRY(write_dst(",", 1));
         }
         if (!g_flags.compact_output) {
-          uint32_t indent = g_depth * g_bytes_per_indent_depth;
-          TRY(write_dst(g_new_line_then_256_indent_bytes, 1 + (indent & 0xFF)));
-          for (indent >>= 8; indent > 0; indent--) {
-            TRY(write_dst(g_new_line_then_256_indent_bytes + 1, 0x100));
-          }
+          TRY_INDENT_WITH_LEADING_NEW_LINE;
         }
       }
 
@@ -1130,13 +1183,47 @@
       }
       g_cursor_index += token_length;
 
-      // Skip filler tokens (e.g. whitespace).
+      // Handle filler tokens (e.g. whitespace, punctuation and comments).
+      // These are skipped, unless -output-comments is enabled.
       if (t.value_base_category() == WUFFS_BASE__TOKEN__VBC__FILLER) {
+        if (g_flags.output_comments &&
+            (t.value_base_detail() &
+             WUFFS_BASE__TOKEN__VBD__FILLER__COMMENT_ANY)) {
+          if (g_flags.compact_output) {
+            TRY(write_dst(g_src.data.ptr + g_cursor_index - token_length,
+                          token_length));
+          } else {
+            if (start_of_token_chain) {
+              if (g_is_after_comment) {
+                TRY_INDENT_SANS_LEADING_NEW_LINE;
+              } else if (g_ctx != context::none) {
+                if (g_ctx == context::in_dict_after_key) {
+                  TRY(write_dst(":", 1));
+                } else if ((g_ctx != context::in_list_after_bracket) &&
+                           (g_ctx != context::in_dict_after_brace)) {
+                  TRY(write_dst(",", 1));
+                }
+                if (!g_flags.compact_output) {
+                  TRY_INDENT_WITH_LEADING_NEW_LINE;
+                }
+              }
+            }
+            TRY(write_dst(g_src.data.ptr + g_cursor_index - token_length,
+                          token_length));
+            if (!t.continued() &&
+                (t.value_base_detail() &
+                 WUFFS_BASE__TOKEN__VBD__FILLER__COMMENT_BLOCK)) {
+              TRY(write_dst("\n", 1));
+            }
+            g_is_after_comment = true;
+          }
+        }
         start_of_token_chain = !t.continued();
         continue;
       }
 
       const char* z = handle_token(t, start_of_token_chain);
+      g_is_after_comment = false;
       start_of_token_chain = !t.continued();
       if (z == nullptr) {
         continue;
commit	2104205d253fc4a31f16cd8e957dd132676c1d63	[log] [tgz]
author	Nigel Tao <nigeltao@golang.org>	Wed Aug 19 23:13:54 2020 +1000
committer	Nigel Tao <nigeltao@golang.org>	Thu Aug 20 11:14:29 2020 +1000
tree	03f322f97abc4d84b1d7f5fa1083f0b7c27f0dec
parent	6b86cbcb2547105a1c598f2f1da44e640c69b9ec [diff]