Optimize example/*json*'s indentation output
On a mid-range x86_64 laptop, processing the 181 MiB citylots.json file
from github.com/zemirco/sf-city-lots-json:
$ time gen/bin/example-jsonptr < citylots.json > /dev/null
Before this commit:
real 0m3.612s
After:
real 0m1.949s
Ratio: 1.85x
diff --git a/example/cbor-to-json/cbor-to-json.cc b/example/cbor-to-json/cbor-to-json.cc
index 81372d9..2a9a76d 100644
--- a/example/cbor-to-json/cbor-to-json.cc
+++ b/example/cbor-to-json/cbor-to-json.cc
@@ -120,10 +120,22 @@
// ----
-// parse_flags enforces that g_flags.spaces <= 8 (the length of
-// INDENT_SPACES_STRING).
-#define INDENT_SPACES_STRING " "
-#define INDENT_TAB_STRING "\t"
+#define NEW_LINE_THEN_256_SPACES \
+ "\n " \
+ " " \
+ " " \
+ " "
+#define NEW_LINE_THEN_256_TABS \
+ "\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+ "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+ "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+ "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+ "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+ "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+ "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
+
+const char* g_new_line_then_256_indent_bytes;
+uint32_t g_bytes_per_indent_depth;
uint8_t g_dst_array[32768];
wuffs_base__io_buffer g_dst;
@@ -151,8 +163,9 @@
bool output_cbor_metadata_as_comments;
bool output_extra_comma;
bool output_inf_nan_numbers;
- size_t spaces;
bool tabs;
+
+ uint32_t spaces;
} g_flags = {0};
std::string //
@@ -274,17 +287,14 @@
do {
switch (g_ctx) {
case context::none:
- // No-op.
- break;
+ goto skip_indentation;
case context::in_list_after_bracket:
- TRY(write_dst("\n", g_flags.compact_output ? 0 : 1));
g_ctx = context::in_list_after_value;
break;
case context::in_list_after_value:
- TRY(write_dst(",\n", g_flags.compact_output ? 1 : 2));
+ TRY(write_dst(",", 1));
break;
case context::in_dict_after_brace:
- TRY(write_dst("\n", g_flags.compact_output ? 0 : 1));
g_ctx = context::in_dict_after_key;
break;
case context::in_dict_after_key:
@@ -292,15 +302,16 @@
g_ctx = context::in_dict_after_value;
goto skip_indentation;
case context::in_dict_after_value:
- TRY(write_dst(",\n", g_flags.compact_output ? 1 : 2));
+ TRY(write_dst(",", 1));
g_ctx = context::in_dict_after_key;
break;
}
if (!g_flags.compact_output) {
- for (size_t i = 0; i < g_depth; i++) {
- TRY(write_dst(g_flags.tabs ? INDENT_TAB_STRING : INDENT_SPACES_STRING,
- g_flags.tabs ? 1 : g_flags.spaces));
+ uint32_t indent = g_depth * g_bytes_per_indent_depth;
+ TRY(write_dst(g_new_line_then_256_indent_bytes, 1 + (indent & 0xFF)));
+ for (indent >>= 8; indent > 0; indent--) {
+ TRY(write_dst(g_new_line_then_256_indent_bytes + 1, 0x100));
}
}
} while (false);
@@ -581,13 +592,12 @@
} else if ((g_ctx != context::in_list_after_bracket) &&
(g_ctx != context::in_dict_after_brace)) {
if (g_flags.output_extra_comma) {
- TRY(write_dst(",\n", 2));
- } else {
- TRY(write_dst("\n", 1));
+ TRY(write_dst(",", 1));
}
- for (size_t i = 0; i < g_depth; i++) {
- TRY(write_dst(g_flags.tabs ? INDENT_TAB_STRING : INDENT_SPACES_STRING,
- g_flags.tabs ? 1 : g_flags.spaces));
+ uint32_t indent = g_depth * g_bytes_per_indent_depth;
+ TRY(write_dst(g_new_line_then_256_indent_bytes, 1 + (indent & 0xFF)));
+ for (indent >>= 8; indent > 0; indent--) {
+ TRY(write_dst(g_new_line_then_256_indent_bytes + 1, 0x100));
}
}
g_ctx = (flags & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST)
@@ -610,6 +620,10 @@
TRY(parse_flags(argc, argv));
+ g_new_line_then_256_indent_bytes =
+ g_flags.tabs ? NEW_LINE_THEN_256_TABS : NEW_LINE_THEN_256_SPACES;
+ g_bytes_per_indent_depth = g_flags.tabs ? 1 : g_flags.spaces;
+
FILE* in = stdin;
if (g_flags.remaining_argc > 1) {
return g_usage;
diff --git a/example/jsonfindptrs/jsonfindptrs.cc b/example/jsonfindptrs/jsonfindptrs.cc
index a9fe5de..0d6930d 100644
--- a/example/jsonfindptrs/jsonfindptrs.cc
+++ b/example/jsonfindptrs/jsonfindptrs.cc
@@ -193,9 +193,11 @@
int remaining_argc;
char** remaining_argv;
- uint32_t max_output_depth;
- char* query_c_string;
bool strict_json_pointer_syntax;
+
+ uint32_t max_output_depth;
+
+ char* query_c_string;
} g_flags = {0};
std::string //
diff --git a/example/jsonptr/jsonptr.cc b/example/jsonptr/jsonptr.cc
index eab2cb2..6295483 100644
--- a/example/jsonptr/jsonptr.cc
+++ b/example/jsonptr/jsonptr.cc
@@ -270,10 +270,22 @@
int g_input_file_descriptor = 0; // A 0 default means stdin.
-// parse_flags enforces that g_flags.spaces <= 8 (the length of
-// INDENT_SPACES_STRING).
-#define INDENT_SPACES_STRING " "
-#define INDENT_TAB_STRING "\t"
+#define NEW_LINE_THEN_256_SPACES \
+ "\n " \
+ " " \
+ " " \
+ " "
+#define NEW_LINE_THEN_256_TABS \
+ "\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+ "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+ "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+ "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+ "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+ "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" \
+ "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
+
+const char* g_new_line_then_256_indent_bytes;
+uint32_t g_bytes_per_indent_depth;
#ifndef DST_BUFFER_ARRAY_SIZE
#define DST_BUFFER_ARRAY_SIZE (32 * 1024)
@@ -581,12 +593,14 @@
bool input_allow_comments;
bool input_allow_extra_comma;
bool input_allow_inf_nan_numbers;
- uint32_t max_output_depth;
bool output_extra_comma;
- char* query_c_string;
- size_t spaces;
bool strict_json_pointer_syntax;
bool tabs;
+
+ uint32_t max_output_depth;
+ uint32_t spaces;
+
+ char* query_c_string;
} g_flags = {0};
const char* //
@@ -722,6 +736,10 @@
return g_usage;
}
+ g_new_line_then_256_indent_bytes =
+ g_flags.tabs ? NEW_LINE_THEN_256_TABS : NEW_LINE_THEN_256_SPACES;
+ g_bytes_per_indent_depth = g_flags.tabs ? 1 : g_flags.spaces;
+
g_query.reset(g_flags.query_c_string);
// If the query is non-empty, suppress writing to stdout until we've
@@ -923,14 +941,12 @@
(g_ctx != context::in_dict_after_brace) &&
!g_flags.compact_output) {
if (g_flags.output_extra_comma) {
- TRY(write_dst(",\n", 2));
- } else {
- TRY(write_dst("\n", 1));
+ TRY(write_dst(",", 1));
}
- for (uint32_t i = 0; i < g_depth; i++) {
- TRY(write_dst(
- g_flags.tabs ? INDENT_TAB_STRING : INDENT_SPACES_STRING,
- g_flags.tabs ? 1 : g_flags.spaces));
+ uint32_t indent = g_depth * g_bytes_per_indent_depth;
+ TRY(write_dst(g_new_line_then_256_indent_bytes, 1 + (indent & 0xFF)));
+ for (indent >>= 8; indent > 0; indent--) {
+ TRY(write_dst(g_new_line_then_256_indent_bytes + 1, 0x100));
}
}
@@ -956,11 +972,10 @@
TRY(write_dst(",", 1));
}
if (!g_flags.compact_output) {
- TRY(write_dst("\n", 1));
- for (size_t i = 0; i < g_depth; i++) {
- TRY(write_dst(
- g_flags.tabs ? INDENT_TAB_STRING : INDENT_SPACES_STRING,
- g_flags.tabs ? 1 : g_flags.spaces));
+ uint32_t indent = g_depth * g_bytes_per_indent_depth;
+ TRY(write_dst(g_new_line_then_256_indent_bytes, 1 + (indent & 0xFF)));
+ for (indent >>= 8; indent > 0; indent--) {
+ TRY(write_dst(g_new_line_then_256_indent_bytes + 1, 0x100));
}
}
}