Optimize example/*json*'s indentation output On a mid-range x86_64 laptop, processing the 181 MiB citylots.json file from github.com/zemirco/sf-city-lots-json: $ time gen/bin/example-jsonptr < citylots.json > /dev/null Before this commit: real 0m3.612s After: real 0m1.949s Ratio: 1.85x

commit: 0a0c7d68aa868b622eacab59dc4d80e31b933ef1 [log] [tgz]
author: Nigel Tao <nigeltao@golang.org> Tue Aug 18 23:31:27 2020 +1000
committer: Nigel Tao <nigeltao@golang.org> Wed Aug 19 00:02:30 2020 +1000
tree: b8e4c2ff80ab37a4306418e95d861f67f79f4cd1
parent: 1864d9c01ab742be9abb4792dba42ea811b1222e [diff]
diff --git a/example/cbor-to-json/cbor-to-json.cc b/example/cbor-to-json/cbor-to-json.cc
index 81372d9..2a9a76d 100644
--- a/example/cbor-to-json/cbor-to-json.cc
+++ b/example/cbor-to-json/cbor-to-json.cc

@@ -120,10 +120,22 @@
 
 // ----
 
-// parse_flags enforces that g_flags.spaces <= 8 (the length of
-// INDENT_SPACES_STRING).
-#define INDENT_SPACES_STRING "        "
-#define INDENT_TAB_STRING "\t"
+#define NEW_LINE_THEN_256_SPACES                                               \
+  "\n                                                                        " \
+  "                                                                          " \
+  "                                                                          " \
+  "                                    "
+#define NEW_LINE_THEN_256_TABS                                                 \
+  "\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+  "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+  "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+  "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+  "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+  "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+  "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
+
+const char* g_new_line_then_256_indent_bytes;
+uint32_t g_bytes_per_indent_depth;
 
 uint8_t g_dst_array[32768];
 wuffs_base__io_buffer g_dst;
@@ -151,8 +163,9 @@
   bool output_cbor_metadata_as_comments;
   bool output_extra_comma;
   bool output_inf_nan_numbers;
-  size_t spaces;
   bool tabs;
+
+  uint32_t spaces;
 } g_flags = {0};
 
 std::string  //
@@ -274,17 +287,14 @@
     do {
       switch (g_ctx) {
         case context::none:
-          // No-op.
-          break;
+          goto skip_indentation;
         case context::in_list_after_bracket:
-          TRY(write_dst("\n", g_flags.compact_output ? 0 : 1));
           g_ctx = context::in_list_after_value;
           break;
         case context::in_list_after_value:
-          TRY(write_dst(",\n", g_flags.compact_output ? 1 : 2));
+          TRY(write_dst(",", 1));
           break;
         case context::in_dict_after_brace:
-          TRY(write_dst("\n", g_flags.compact_output ? 0 : 1));
           g_ctx = context::in_dict_after_key;
           break;
         case context::in_dict_after_key:
@@ -292,15 +302,16 @@
           g_ctx = context::in_dict_after_value;
           goto skip_indentation;
         case context::in_dict_after_value:
-          TRY(write_dst(",\n", g_flags.compact_output ? 1 : 2));
+          TRY(write_dst(",", 1));
           g_ctx = context::in_dict_after_key;
           break;
       }
 
       if (!g_flags.compact_output) {
-        for (size_t i = 0; i < g_depth; i++) {
-          TRY(write_dst(g_flags.tabs ? INDENT_TAB_STRING : INDENT_SPACES_STRING,
-                        g_flags.tabs ? 1 : g_flags.spaces));
+        uint32_t indent = g_depth * g_bytes_per_indent_depth;
+        TRY(write_dst(g_new_line_then_256_indent_bytes, 1 + (indent & 0xFF)));
+        for (indent >>= 8; indent > 0; indent--) {
+          TRY(write_dst(g_new_line_then_256_indent_bytes + 1, 0x100));
         }
       }
     } while (false);
@@ -581,13 +592,12 @@
     } else if ((g_ctx != context::in_list_after_bracket) &&
                (g_ctx != context::in_dict_after_brace)) {
       if (g_flags.output_extra_comma) {
-        TRY(write_dst(",\n", 2));
-      } else {
-        TRY(write_dst("\n", 1));
+        TRY(write_dst(",", 1));
       }
-      for (size_t i = 0; i < g_depth; i++) {
-        TRY(write_dst(g_flags.tabs ? INDENT_TAB_STRING : INDENT_SPACES_STRING,
-                      g_flags.tabs ? 1 : g_flags.spaces));
+      uint32_t indent = g_depth * g_bytes_per_indent_depth;
+      TRY(write_dst(g_new_line_then_256_indent_bytes, 1 + (indent & 0xFF)));
+      for (indent >>= 8; indent > 0; indent--) {
+        TRY(write_dst(g_new_line_then_256_indent_bytes + 1, 0x100));
       }
     }
     g_ctx = (flags & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST)
@@ -610,6 +620,10 @@
 
   TRY(parse_flags(argc, argv));
 
+  g_new_line_then_256_indent_bytes =
+      g_flags.tabs ? NEW_LINE_THEN_256_TABS : NEW_LINE_THEN_256_SPACES;
+  g_bytes_per_indent_depth = g_flags.tabs ? 1 : g_flags.spaces;
+
   FILE* in = stdin;
   if (g_flags.remaining_argc > 1) {
     return g_usage;

diff --git a/example/jsonfindptrs/jsonfindptrs.cc b/example/jsonfindptrs/jsonfindptrs.cc
index a9fe5de..0d6930d 100644
--- a/example/jsonfindptrs/jsonfindptrs.cc
+++ b/example/jsonfindptrs/jsonfindptrs.cc

@@ -193,9 +193,11 @@
   int remaining_argc;
   char** remaining_argv;
 
-  uint32_t max_output_depth;
-  char* query_c_string;
   bool strict_json_pointer_syntax;
+
+  uint32_t max_output_depth;
+
+  char* query_c_string;
 } g_flags = {0};
 
 std::string  //

diff --git a/example/jsonptr/jsonptr.cc b/example/jsonptr/jsonptr.cc
index eab2cb2..6295483 100644
--- a/example/jsonptr/jsonptr.cc
+++ b/example/jsonptr/jsonptr.cc

@@ -270,10 +270,22 @@
 
 int g_input_file_descriptor = 0;  // A 0 default means stdin.
 
-// parse_flags enforces that g_flags.spaces <= 8 (the length of
-// INDENT_SPACES_STRING).
-#define INDENT_SPACES_STRING "        "
-#define INDENT_TAB_STRING "\t"
+#define NEW_LINE_THEN_256_SPACES                                               \
+  "\n                                                                        " \
+  "                                                                          " \
+  "                                                                          " \
+  "                                    "
+#define NEW_LINE_THEN_256_TABS                                                 \
+  "\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+  "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+  "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+  "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+  "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+  "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+  "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
+
+const char* g_new_line_then_256_indent_bytes;
+uint32_t g_bytes_per_indent_depth;
 
 #ifndef DST_BUFFER_ARRAY_SIZE
 #define DST_BUFFER_ARRAY_SIZE (32 * 1024)
@@ -581,12 +593,14 @@
   bool input_allow_comments;
   bool input_allow_extra_comma;
   bool input_allow_inf_nan_numbers;
-  uint32_t max_output_depth;
   bool output_extra_comma;
-  char* query_c_string;
-  size_t spaces;
   bool strict_json_pointer_syntax;
   bool tabs;
+
+  uint32_t max_output_depth;
+  uint32_t spaces;
+
+  char* query_c_string;
 } g_flags = {0};
 
 const char*  //
@@ -722,6 +736,10 @@
     return g_usage;
   }
 
+  g_new_line_then_256_indent_bytes =
+      g_flags.tabs ? NEW_LINE_THEN_256_TABS : NEW_LINE_THEN_256_SPACES;
+  g_bytes_per_indent_depth = g_flags.tabs ? 1 : g_flags.spaces;
+
   g_query.reset(g_flags.query_c_string);
 
   // If the query is non-empty, suppress writing to stdout until we've
@@ -923,14 +941,12 @@
             (g_ctx != context::in_dict_after_brace) &&
             !g_flags.compact_output) {
           if (g_flags.output_extra_comma) {
-            TRY(write_dst(",\n", 2));
-          } else {
-            TRY(write_dst("\n", 1));
+            TRY(write_dst(",", 1));
           }
-          for (uint32_t i = 0; i < g_depth; i++) {
-            TRY(write_dst(
-                g_flags.tabs ? INDENT_TAB_STRING : INDENT_SPACES_STRING,
-                g_flags.tabs ? 1 : g_flags.spaces));
+          uint32_t indent = g_depth * g_bytes_per_indent_depth;
+          TRY(write_dst(g_new_line_then_256_indent_bytes, 1 + (indent & 0xFF)));
+          for (indent >>= 8; indent > 0; indent--) {
+            TRY(write_dst(g_new_line_then_256_indent_bytes + 1, 0x100));
           }
         }
 
@@ -956,11 +972,10 @@
           TRY(write_dst(",", 1));
         }
         if (!g_flags.compact_output) {
-          TRY(write_dst("\n", 1));
-          for (size_t i = 0; i < g_depth; i++) {
-            TRY(write_dst(
-                g_flags.tabs ? INDENT_TAB_STRING : INDENT_SPACES_STRING,
-                g_flags.tabs ? 1 : g_flags.spaces));
+          uint32_t indent = g_depth * g_bytes_per_indent_depth;
+          TRY(write_dst(g_new_line_then_256_indent_bytes, 1 + (indent & 0xFF)));
+          for (indent >>= 8; indent > 0; indent--) {
+            TRY(write_dst(g_new_line_then_256_indent_bytes + 1, 0x100));
           }
         }
       }
commit	0a0c7d68aa868b622eacab59dc4d80e31b933ef1	[log] [tgz]
author	Nigel Tao <nigeltao@golang.org>	Tue Aug 18 23:31:27 2020 +1000
committer	Nigel Tao <nigeltao@golang.org>	Wed Aug 19 00:02:30 2020 +1000
tree	b8e4c2ff80ab37a4306418e95d861f67f79f4cd1
parent	1864d9c01ab742be9abb4792dba42ea811b1222e [diff]