Optimize f64conv's high_prec_dec__parse
Binary size, before:
27336 gen/lib/c/clang-5.0-dynamic/wuffs-base-f64conv.lo
27528 gen/lib/c/clang-5.0-static/wuffs-base-f64conv.o
28856 gen/lib/c/gcc-dynamic/wuffs-base-f64conv.lo
28856 gen/lib/c/gcc-static/wuffs-base-f64conv.o
After:
27704 gen/lib/c/clang-5.0-dynamic/wuffs-base-f64conv.lo
27896 gen/lib/c/clang-5.0-static/wuffs-base-f64conv.o
29120 gen/lib/c/gcc-dynamic/wuffs-base-f64conv.lo
29120 gen/lib/c/gcc-static/wuffs-base-f64conv.o
$ g++ -O3 script/process-json-numbers.c
$ time ./a.out -parse-number-f64 < citylots.json
Before/After:
real 0m1.421s
real 0m1.306s
citylots.json is from github.com/zemirco/sf-city-lots-json
name old time/op new time/op delta
wuffs_strconv_parse_number_f64_1_lsh53_add0/clang5 99.0ns ± 0% 73.2ns ± 2% -26.06% (p=0.000 n=4+5)
wuffs_strconv_parse_number_f64_1_lsh53_add1/clang5 744ns ± 1% 688ns ± 0% -7.42% (p=0.008 n=5+5)
wuffs_strconv_parse_number_f64_pi_long/clang5 161ns ± 0% 113ns ± 1% -29.76% (p=0.008 n=5+5)
wuffs_strconv_parse_number_f64_pi_short/clang5 48.0ns ± 0% 35.4ns ± 7% -26.25% (p=0.016 n=4+5)
wuffs_strconv_parse_number_f64_1_lsh53_add0/gcc7 94.8ns ± 1% 67.0ns ± 0% -29.32% (p=0.000 n=5+4)
wuffs_strconv_parse_number_f64_1_lsh53_add1/gcc7 747ns ± 1% 721ns ± 0% -3.51% (p=0.008 n=5+5)
wuffs_strconv_parse_number_f64_pi_long/gcc7 161ns ± 0% 90ns ± 1% -43.99% (p=0.008 n=5+5)
wuffs_strconv_parse_number_f64_pi_short/gcc7 48.4ns ± 5% 32.2ns ± 6% -33.47% (p=0.008 n=5+5)
diff --git a/internal/cgen/base/f64conv-submodule.c b/internal/cgen/base/f64conv-submodule.c
index 3895fdf..05fff67 100644
--- a/internal/cgen/base/f64conv-submodule.c
+++ b/internal/cgen/base/f64conv-submodule.c
@@ -146,10 +146,12 @@
uint8_t* p = s.ptr;
uint8_t* q = s.ptr + s.len;
- for (; (p < q) && (*p == '_'); p++) {
- }
- if (p >= q) {
- return wuffs_base__make_status(wuffs_base__error__bad_argument);
+ for (;; p++) {
+ if (p >= q) {
+ return wuffs_base__make_status(wuffs_base__error__bad_argument);
+ } else if (*p != '_') {
+ break;
+ }
}
// Parse sign.
@@ -162,79 +164,110 @@
} else {
break;
}
- for (; (p < q) && (*p == '_'); p++) {
+ for (;; p++) {
+ if (p >= q) {
+ return wuffs_base__make_status(wuffs_base__error__bad_argument);
+ } else if (*p != '_') {
+ break;
+ }
}
} while (0);
- // Parse digits.
+ // Parse digits, up to (and including) a '.', 'E' or 'e'. Examples for each
+ // limb in this if-else chain:
+ // - "0.789"
+ // - "1002.789"
+ // - ".789"
+ // - Other (invalid input).
uint32_t nd = 0;
int32_t dp = 0;
- bool saw_digits = false;
- bool saw_non_zero_digits = false;
- bool saw_dot = false;
- for (; p < q; p++) {
- if (*p == '_') {
- // No-op.
-
- } else if ((*p == '.') || (*p == ',')) {
- // As per https://en.wikipedia.org/wiki/Decimal_separator, both '.' or
- // ',' are commonly used. We just parse either, regardless of LOCALE.
- if (saw_dot) {
+ bool no_digits_before_separator = false;
+ if ('0' == *p) {
+ p++;
+ for (;; p++) {
+ if (p >= q) {
+ goto after_all;
+ } else if ((*p == '.') || (*p == ',')) {
+ p++;
+ goto after_sep;
+ } else if ((*p == 'E') || (*p == 'e')) {
+ p++;
+ goto after_exp;
+ } else if (*p != '_') {
return wuffs_base__make_status(wuffs_base__error__bad_argument);
}
- saw_dot = true;
- dp = (int32_t)nd;
+ }
+ } else if (('0' < *p) && (*p <= '9')) {
+ h->digits[nd++] = (uint8_t)(*p - '0');
+ dp = (int32_t)nd;
+ p++;
+ for (;; p++) {
+ if (p >= q) {
+ goto after_all;
+ } else if (('0' <= *p) && (*p <= '9')) {
+ if (nd < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
+ h->digits[nd++] = (uint8_t)(*p - '0');
+ dp = (int32_t)nd;
+ } else if ('0' != *p) {
+ // Long-tail non-zeroes set the truncated bit.
+ h->truncated = true;
+ }
+ } else if ((*p == '.') || (*p == ',')) {
+ p++;
+ goto after_sep;
+ } else if ((*p == 'E') || (*p == 'e')) {
+ p++;
+ goto after_exp;
+ } else if (*p != '_') {
+ return wuffs_base__make_status(wuffs_base__error__bad_argument);
+ }
+ }
+
+ } else if ((*p == '.') || (*p == ',')) {
+ p++;
+ no_digits_before_separator = true;
+
+ } else {
+ return wuffs_base__make_status(wuffs_base__error__bad_argument);
+ }
+
+after_sep:
+ for (;; p++) {
+ if (p >= q) {
+ goto after_all;
} else if ('0' == *p) {
- if (!saw_dot && !saw_non_zero_digits && saw_digits) {
- // We don't allow unnecessary leading zeroes: "000123" or "0644".
- return wuffs_base__make_status(wuffs_base__error__bad_argument);
- }
- saw_digits = true;
if (nd == 0) {
// Track leading zeroes implicitly.
dp--;
} else if (nd <
WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
- h->digits[nd++] = 0;
- } else {
- // Long-tail zeroes are ignored.
+ h->digits[nd++] = (uint8_t)(*p - '0');
}
-
} else if (('0' < *p) && (*p <= '9')) {
- if (!saw_dot && !saw_non_zero_digits && saw_digits) {
- // We don't allow unnecessary leading zeroes: "000123" or "0644".
- return wuffs_base__make_status(wuffs_base__error__bad_argument);
- }
- saw_digits = true;
- saw_non_zero_digits = true;
if (nd < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
h->digits[nd++] = (uint8_t)(*p - '0');
} else {
// Long-tail non-zeroes set the truncated bit.
h->truncated = true;
}
-
- } else {
- break;
- }
- }
-
- if (!saw_digits) {
- return wuffs_base__make_status(wuffs_base__error__bad_argument);
- }
- if (!saw_dot) {
- dp = (int32_t)nd;
- }
-
- // Parse exponent.
- if ((p < q) && ((*p == 'E') || (*p == 'e'))) {
- p++;
- for (; (p < q) && (*p == '_'); p++) {
- }
- if (p >= q) {
+ } else if ((*p == 'E') || (*p == 'e')) {
+ p++;
+ goto after_exp;
+ } else if (*p != '_') {
return wuffs_base__make_status(wuffs_base__error__bad_argument);
}
+ }
+
+after_exp:
+ do {
+ for (;; p++) {
+ if (p >= q) {
+ return wuffs_base__make_status(wuffs_base__error__bad_argument);
+ } else if (*p != '_') {
+ break;
+ }
+ }
int32_t exp_sign = +1;
if (*p == '+') {
@@ -265,14 +298,17 @@
return wuffs_base__make_status(wuffs_base__error__bad_argument);
}
dp += exp_sign * exp;
- }
+ } while (0);
- // Finish.
+after_all:
if (p != q) {
return wuffs_base__make_status(wuffs_base__error__bad_argument);
}
h->num_digits = nd;
if (nd == 0) {
+ if (no_digits_before_separator) {
+ return wuffs_base__make_status(wuffs_base__error__bad_argument);
+ }
h->decimal_point = 0;
} else if (dp <
-WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
diff --git a/internal/cgen/data/data.go b/internal/cgen/data/data.go
index 2c5563b..f29bcd0 100644
--- a/internal/cgen/data/data.go
+++ b/internal/cgen/data/data.go
@@ -33,11 +33,12 @@
"e ±2047\n// bounds are further away from zero than ±(324 + 800), where 800 and 2047 is\n// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION and\n// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.\n//\n// digits[.. num_digits] are the number's digits in big-endian order. The\n// uint8_t values are in the range [0 ..= 9], not ['0' ..= '9'], where e.g. '7'\n// is the ASCII value 0x37.\n//\n// decimal_point is the index (within digits) of the decimal point. It may be\n// negative or be larger than num_digits, in which case the explicit digits are\n// padded with implicit zeroes.\n//\n// For example, if num_digits is 3 and digits is \"\\x07\\x08\\x09\":\n// - A decimal_point of -2 means \".00789\"\n// - A decimal_point of -1 means \".0789\"\n// - A decimal_point of +0 means \".789\"\n// - A decimal_point of +1 means \"7.89\"\n// - A decimal_point of +2 means \"78.9\"\n// - A decimal_point of +3 means \"789.\"\n// - A decimal_point of +4 means \"7890.\"\n// - A decimal_point of +5 means \"78900.\"\n//\n// As above, a" +
" decimal_point higher than +2047 means that the overall value is\n// infinity, lower than -2047 means zero.\n//\n// negative is a sign bit. An HPD can distinguish positive and negative zero.\n//\n// truncated is whether there are more than\n// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION digits, and at\n// least one of those extra digits are non-zero. The existence of long-tail\n// digits can affect rounding.\n//\n// The \"all fields are zero\" value is valid, and represents the number +0.\ntypedef struct {\n uint32_t num_digits;\n int32_t decimal_point;\n bool negative;\n bool truncated;\n uint8_t digits[WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION];\n} wuffs_base__private_implementation__high_prec_dec;\n\n// wuffs_base__private_implementation__high_prec_dec__trim trims trailing\n// zeroes from the h->digits[.. h->num_digits] slice. They have no benefit,\n// since we explicitly track h->decimal_point.\n//\n// Preconditions:\n// - h is non-NULL.\nstatic inline void //\nwuffs_base__private_implementation_" +
"_high_prec_dec__trim(\n wuffs_base__private_implementation__high_prec_dec* h) {\n while ((h->num_digits > 0) && (h->digits[h->num_digits - 1] == 0)) {\n h->num_digits--;\n }\n}\n\n// wuffs_base__private_implementation__high_prec_dec__assign sets h to\n// represent the number x.\n//\n// Preconditions:\n// - h is non-NULL.\nstatic void //\nwuffs_base__private_implementation__high_prec_dec__assign(\n wuffs_base__private_implementation__high_prec_dec* h,\n uint64_t x,\n bool negative) {\n uint32_t n = 0;\n\n // Set h->digits.\n if (x > 0) {\n // Calculate the digits, working right-to-left. After we determine n (how\n // many digits there are), copy from buf to h->digits.\n //\n // UINT64_MAX, 18446744073709551615, is 20 digits long. It can be faster to\n // copy a constant number of bytes than a variable number (20 instead of\n // n). Make buf large enough (and start writing to it from the middle) so\n // that can we always copy 20 bytes: the slice buf[(20-n) .. (40-n)].\n uint8_t buf[40] = {0};" +
- "\n uint8_t* ptr = &buf[20];\n do {\n uint64_t remaining = x / 10;\n x -= remaining * 10;\n ptr--;\n *ptr = (uint8_t)x;\n n++;\n x = remaining;\n } while (x > 0);\n memcpy(h->digits, ptr, 20);\n }\n\n // Set h's other fields.\n h->num_digits = n;\n h->decimal_point = (int32_t)n;\n h->negative = negative;\n h->truncated = false;\n wuffs_base__private_implementation__high_prec_dec__trim(h);\n}\n\nstatic wuffs_base__status //\nwuffs_base__private_implementation__high_prec_dec__parse(\n wuffs_base__private_implementation__high_prec_dec* h,\n wuffs_base__slice_u8 s) {\n if (!h) {\n return wuffs_base__make_status(wuffs_base__error__bad_receiver);\n }\n h->num_digits = 0;\n h->decimal_point = 0;\n h->negative = false;\n h->truncated = false;\n\n uint8_t* p = s.ptr;\n uint8_t* q = s.ptr + s.len;\n\n for (; (p < q) && (*p == '_'); p++) {\n }\n if (p >= q) {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n }\n\n // Parse sign.\n do {\n if (*p == '+') {\n p++;\n " +
- " } else if (*p == '-') {\n h->negative = true;\n p++;\n } else {\n break;\n }\n for (; (p < q) && (*p == '_'); p++) {\n }\n } while (0);\n\n // Parse digits.\n uint32_t nd = 0;\n int32_t dp = 0;\n bool saw_digits = false;\n bool saw_non_zero_digits = false;\n bool saw_dot = false;\n for (; p < q; p++) {\n if (*p == '_') {\n // No-op.\n\n } else if ((*p == '.') || (*p == ',')) {\n // As per https://en.wikipedia.org/wiki/Decimal_separator, both '.' or\n // ',' are commonly used. We just parse either, regardless of LOCALE.\n if (saw_dot) {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n }\n saw_dot = true;\n dp = (int32_t)nd;\n\n } else if ('0' == *p) {\n if (!saw_dot && !saw_non_zero_digits && saw_digits) {\n // We don't allow unnecessary leading zeroes: \"000123\" or \"0644\".\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n }\n saw_digits = true;\n if (nd == 0) {\n // Track leading " +
- "zeroes implicitly.\n dp--;\n } else if (nd <\n WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {\n h->digits[nd++] = 0;\n } else {\n // Long-tail zeroes are ignored.\n }\n\n } else if (('0' < *p) && (*p <= '9')) {\n if (!saw_dot && !saw_non_zero_digits && saw_digits) {\n // We don't allow unnecessary leading zeroes: \"000123\" or \"0644\".\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n }\n saw_digits = true;\n saw_non_zero_digits = true;\n if (nd < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {\n h->digits[nd++] = (uint8_t)(*p - '0');\n } else {\n // Long-tail non-zeroes set the truncated bit.\n h->truncated = true;\n }\n\n } else {\n break;\n }\n }\n\n if (!saw_digits) {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n }\n if (!saw_dot) {\n dp = (int32_t)nd;\n }\n\n // Parse exponent.\n if ((p < q) && ((*p == 'E') || (*p == 'e'))) {\n " +
- " p++;\n for (; (p < q) && (*p == '_'); p++) {\n }\n if (p >= q) {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n }\n\n int32_t exp_sign = +1;\n if (*p == '+') {\n p++;\n } else if (*p == '-') {\n exp_sign = -1;\n p++;\n }\n\n int32_t exp = 0;\n const int32_t exp_large =\n WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE +\n WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION;\n bool saw_exp_digits = false;\n for (; p < q; p++) {\n if (*p == '_') {\n // No-op.\n } else if (('0' <= *p) && (*p <= '9')) {\n saw_exp_digits = true;\n if (exp < exp_large) {\n exp = (10 * exp) + ((int32_t)(*p - '0'));\n }\n } else {\n break;\n }\n }\n if (!saw_exp_digits) {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n }\n dp += exp_sign * exp;\n }\n\n // Finish.\n if (p != q) {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n }\n h->n" +
- "um_digits = nd;\n if (nd == 0) {\n h->decimal_point = 0;\n } else if (dp <\n -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {\n h->decimal_point =\n -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE - 1;\n } else if (dp >\n +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {\n h->decimal_point =\n +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE + 1;\n } else {\n h->decimal_point = dp;\n }\n wuffs_base__private_implementation__high_prec_dec__trim(h);\n return wuffs_base__make_status(NULL);\n}\n\n" +
+ "\n uint8_t* ptr = &buf[20];\n do {\n uint64_t remaining = x / 10;\n x -= remaining * 10;\n ptr--;\n *ptr = (uint8_t)x;\n n++;\n x = remaining;\n } while (x > 0);\n memcpy(h->digits, ptr, 20);\n }\n\n // Set h's other fields.\n h->num_digits = n;\n h->decimal_point = (int32_t)n;\n h->negative = negative;\n h->truncated = false;\n wuffs_base__private_implementation__high_prec_dec__trim(h);\n}\n\nstatic wuffs_base__status //\nwuffs_base__private_implementation__high_prec_dec__parse(\n wuffs_base__private_implementation__high_prec_dec* h,\n wuffs_base__slice_u8 s) {\n if (!h) {\n return wuffs_base__make_status(wuffs_base__error__bad_receiver);\n }\n h->num_digits = 0;\n h->decimal_point = 0;\n h->negative = false;\n h->truncated = false;\n\n uint8_t* p = s.ptr;\n uint8_t* q = s.ptr + s.len;\n\n for (;; p++) {\n if (p >= q) {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n } else if (*p != '_') {\n break;\n }\n }\n\n // Parse sign.\n do {\n if (*p" +
+ " == '+') {\n p++;\n } else if (*p == '-') {\n h->negative = true;\n p++;\n } else {\n break;\n }\n for (;; p++) {\n if (p >= q) {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n } else if (*p != '_') {\n break;\n }\n }\n } while (0);\n\n // Parse digits, up to (and including) a '.', 'E' or 'e'. Examples for each\n // limb in this if-else chain:\n // - \"0.789\"\n // - \"1002.789\"\n // - \".789\"\n // - Other (invalid input).\n uint32_t nd = 0;\n int32_t dp = 0;\n bool no_digits_before_separator = false;\n if ('0' == *p) {\n p++;\n for (;; p++) {\n if (p >= q) {\n goto after_all;\n } else if ((*p == '.') || (*p == ',')) {\n p++;\n goto after_sep;\n } else if ((*p == 'E') || (*p == 'e')) {\n p++;\n goto after_exp;\n } else if (*p != '_') {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n }\n }\n\n } else if (('0' < *p) && (*p <= '9')) {\n h->digits[nd++] = (uint" +
+ "8_t)(*p - '0');\n dp = (int32_t)nd;\n p++;\n for (;; p++) {\n if (p >= q) {\n goto after_all;\n } else if (('0' <= *p) && (*p <= '9')) {\n if (nd < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {\n h->digits[nd++] = (uint8_t)(*p - '0');\n dp = (int32_t)nd;\n } else if ('0' != *p) {\n // Long-tail non-zeroes set the truncated bit.\n h->truncated = true;\n }\n } else if ((*p == '.') || (*p == ',')) {\n p++;\n goto after_sep;\n } else if ((*p == 'E') || (*p == 'e')) {\n p++;\n goto after_exp;\n } else if (*p != '_') {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n }\n }\n\n } else if ((*p == '.') || (*p == ',')) {\n p++;\n no_digits_before_separator = true;\n\n } else {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n }\n\nafter_sep:\n for (;; p++) {\n if (p >= q) {\n goto after_all;\n } else if ('0' == *p) {\n if (nd == 0) {\n " +
+ " // Track leading zeroes implicitly.\n dp--;\n } else if (nd <\n WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {\n h->digits[nd++] = (uint8_t)(*p - '0');\n }\n } else if (('0' < *p) && (*p <= '9')) {\n if (nd < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {\n h->digits[nd++] = (uint8_t)(*p - '0');\n } else {\n // Long-tail non-zeroes set the truncated bit.\n h->truncated = true;\n }\n } else if ((*p == 'E') || (*p == 'e')) {\n p++;\n goto after_exp;\n } else if (*p != '_') {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n }\n }\n\nafter_exp:\n do {\n for (;; p++) {\n if (p >= q) {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n } else if (*p != '_') {\n break;\n }\n }\n\n int32_t exp_sign = +1;\n if (*p == '+') {\n p++;\n } else if (*p == '-') {\n exp_sign = -1;\n p++;\n }\n\n int32_t exp = 0;\n const int3" +
+ "2_t exp_large =\n WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE +\n WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION;\n bool saw_exp_digits = false;\n for (; p < q; p++) {\n if (*p == '_') {\n // No-op.\n } else if (('0' <= *p) && (*p <= '9')) {\n saw_exp_digits = true;\n if (exp < exp_large) {\n exp = (10 * exp) + ((int32_t)(*p - '0'));\n }\n } else {\n break;\n }\n }\n if (!saw_exp_digits) {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n }\n dp += exp_sign * exp;\n } while (0);\n\nafter_all:\n if (p != q) {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n }\n h->num_digits = nd;\n if (nd == 0) {\n if (no_digits_before_separator) {\n return wuffs_base__make_status(wuffs_base__error__bad_argument);\n }\n h->decimal_point = 0;\n } else if (dp <\n -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {\n h->decimal_point =\n -" +
+ "WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE - 1;\n } else if (dp >\n +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {\n h->decimal_point =\n +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE + 1;\n } else {\n h->decimal_point = dp;\n }\n wuffs_base__private_implementation__high_prec_dec__trim(h);\n return wuffs_base__make_status(NULL);\n}\n\n" +
"" +
"// --------\n\n// The etc__hpd_left_shift and etc__powers_of_5 tables were printed by\n// script/print-hpd-left-shift.go. That script has an optional -comments flag,\n// whose output is not copied here, which prints further detail.\n//\n// These tables are used in\n// wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits.\n\n// wuffs_base__private_implementation__hpd_left_shift[i] encodes the number of\n// new digits created after multiplying a positive integer by (1 << i): the\n// additional length in the decimal representation. For example, shifting \"234\"\n// by 3 (equivalent to multiplying by 8) will produce \"1872\". Going from a\n// 3-length string to a 4-length string means that 1 new digit was added (and\n// existing digits may have changed).\n//\n// Shifting by i can add either N or N-1 new digits, depending on whether the\n// original positive integer compares >= or < to the i'th power of 5 (as 10\n// equals 2 * 5). Comparison is lexicographic, not numerical.\n//\n// For example, shifting by 4 (i.e. mul" +
"tiplying by 16) can add 1 or 2 new\n// digits, depending on a lexicographic comparison to (5 ** 4), i.e. \"625\":\n// - (\"1\" << 4) is \"16\", which adds 1 new digit.\n// - (\"5678\" << 4) is \"90848\", which adds 1 new digit.\n// - (\"624\" << 4) is \"9984\", which adds 1 new digit.\n// - (\"62498\" << 4) is \"999968\", which adds 1 new digit.\n// - (\"625\" << 4) is \"10000\", which adds 2 new digits.\n// - (\"625001\" << 4) is \"10000016\", which adds 2 new digits.\n// - (\"7008\" << 4) is \"112128\", which adds 2 new digits.\n// - (\"99\" << 4) is \"1584\", which adds 2 new digits.\n//\n// Thus, when i is 4, N is 2 and (5 ** i) is \"625\". This etc__hpd_left_shift\n// array encodes this as:\n// - etc__hpd_left_shift[4] is 0x1006 = (2 << 11) | 0x0006.\n// - etc__hpd_left_shift[5] is 0x1009 = (? << 11) | 0x0009.\n// where the ? isn't relevant for i == 4.\n//\n// The high 5 bits of etc__hpd_left_shift[i] is N, the higher of the two\n// possible number of new digits. The low 11 bits are an offset into the\n//" +
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index ab39ffd..86f7417 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -9092,10 +9092,12 @@
uint8_t* p = s.ptr;
uint8_t* q = s.ptr + s.len;
- for (; (p < q) && (*p == '_'); p++) {
- }
- if (p >= q) {
- return wuffs_base__make_status(wuffs_base__error__bad_argument);
+ for (;; p++) {
+ if (p >= q) {
+ return wuffs_base__make_status(wuffs_base__error__bad_argument);
+ } else if (*p != '_') {
+ break;
+ }
}
// Parse sign.
@@ -9108,79 +9110,110 @@
} else {
break;
}
- for (; (p < q) && (*p == '_'); p++) {
+ for (;; p++) {
+ if (p >= q) {
+ return wuffs_base__make_status(wuffs_base__error__bad_argument);
+ } else if (*p != '_') {
+ break;
+ }
}
} while (0);
- // Parse digits.
+ // Parse digits, up to (and including) a '.', 'E' or 'e'. Examples for each
+ // limb in this if-else chain:
+ // - "0.789"
+ // - "1002.789"
+ // - ".789"
+ // - Other (invalid input).
uint32_t nd = 0;
int32_t dp = 0;
- bool saw_digits = false;
- bool saw_non_zero_digits = false;
- bool saw_dot = false;
- for (; p < q; p++) {
- if (*p == '_') {
- // No-op.
-
- } else if ((*p == '.') || (*p == ',')) {
- // As per https://en.wikipedia.org/wiki/Decimal_separator, both '.' or
- // ',' are commonly used. We just parse either, regardless of LOCALE.
- if (saw_dot) {
+ bool no_digits_before_separator = false;
+ if ('0' == *p) {
+ p++;
+ for (;; p++) {
+ if (p >= q) {
+ goto after_all;
+ } else if ((*p == '.') || (*p == ',')) {
+ p++;
+ goto after_sep;
+ } else if ((*p == 'E') || (*p == 'e')) {
+ p++;
+ goto after_exp;
+ } else if (*p != '_') {
return wuffs_base__make_status(wuffs_base__error__bad_argument);
}
- saw_dot = true;
- dp = (int32_t)nd;
+ }
+ } else if (('0' < *p) && (*p <= '9')) {
+ h->digits[nd++] = (uint8_t)(*p - '0');
+ dp = (int32_t)nd;
+ p++;
+ for (;; p++) {
+ if (p >= q) {
+ goto after_all;
+ } else if (('0' <= *p) && (*p <= '9')) {
+ if (nd < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
+ h->digits[nd++] = (uint8_t)(*p - '0');
+ dp = (int32_t)nd;
+ } else if ('0' != *p) {
+ // Long-tail non-zeroes set the truncated bit.
+ h->truncated = true;
+ }
+ } else if ((*p == '.') || (*p == ',')) {
+ p++;
+ goto after_sep;
+ } else if ((*p == 'E') || (*p == 'e')) {
+ p++;
+ goto after_exp;
+ } else if (*p != '_') {
+ return wuffs_base__make_status(wuffs_base__error__bad_argument);
+ }
+ }
+
+ } else if ((*p == '.') || (*p == ',')) {
+ p++;
+ no_digits_before_separator = true;
+
+ } else {
+ return wuffs_base__make_status(wuffs_base__error__bad_argument);
+ }
+
+after_sep:
+ for (;; p++) {
+ if (p >= q) {
+ goto after_all;
} else if ('0' == *p) {
- if (!saw_dot && !saw_non_zero_digits && saw_digits) {
- // We don't allow unnecessary leading zeroes: "000123" or "0644".
- return wuffs_base__make_status(wuffs_base__error__bad_argument);
- }
- saw_digits = true;
if (nd == 0) {
// Track leading zeroes implicitly.
dp--;
} else if (nd <
WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
- h->digits[nd++] = 0;
- } else {
- // Long-tail zeroes are ignored.
+ h->digits[nd++] = (uint8_t)(*p - '0');
}
-
} else if (('0' < *p) && (*p <= '9')) {
- if (!saw_dot && !saw_non_zero_digits && saw_digits) {
- // We don't allow unnecessary leading zeroes: "000123" or "0644".
- return wuffs_base__make_status(wuffs_base__error__bad_argument);
- }
- saw_digits = true;
- saw_non_zero_digits = true;
if (nd < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
h->digits[nd++] = (uint8_t)(*p - '0');
} else {
// Long-tail non-zeroes set the truncated bit.
h->truncated = true;
}
-
- } else {
- break;
- }
- }
-
- if (!saw_digits) {
- return wuffs_base__make_status(wuffs_base__error__bad_argument);
- }
- if (!saw_dot) {
- dp = (int32_t)nd;
- }
-
- // Parse exponent.
- if ((p < q) && ((*p == 'E') || (*p == 'e'))) {
- p++;
- for (; (p < q) && (*p == '_'); p++) {
- }
- if (p >= q) {
+ } else if ((*p == 'E') || (*p == 'e')) {
+ p++;
+ goto after_exp;
+ } else if (*p != '_') {
return wuffs_base__make_status(wuffs_base__error__bad_argument);
}
+ }
+
+after_exp:
+ do {
+ for (;; p++) {
+ if (p >= q) {
+ return wuffs_base__make_status(wuffs_base__error__bad_argument);
+ } else if (*p != '_') {
+ break;
+ }
+ }
int32_t exp_sign = +1;
if (*p == '+') {
@@ -9211,14 +9244,17 @@
return wuffs_base__make_status(wuffs_base__error__bad_argument);
}
dp += exp_sign * exp;
- }
+ } while (0);
- // Finish.
+after_all:
if (p != q) {
return wuffs_base__make_status(wuffs_base__error__bad_argument);
}
h->num_digits = nd;
if (nd == 0) {
+ if (no_digits_before_separator) {
+ return wuffs_base__make_status(wuffs_base__error__bad_argument);
+ }
h->decimal_point = 0;
} else if (dp <
-WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {