floatconv: optimize parse_number_f64 fallback
The time taken running script/manual-test-parse-number-f64.cc on the
parse-number-fxx-test-data suite falls from 3.565s to 2.491s (a 1.43x
ratio; the inverse is 0.70 = 100% - 30%).
Fixes #113
diff --git a/doc/changelog.md b/doc/changelog.md
index ea15a32..acb9a10 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -1,6 +1,11 @@
# Changelog
+## Work In Progress
+
+The `wuffs_base__parse_number_f64` function has been further optimized.
+
+
## 2023-04-07 version 0.3.2
The `std/bmp` and `std/nie` image decoders' `decode_frame` method now allow
diff --git a/internal/cgen/base/floatconv-submodule-code.c b/internal/cgen/base/floatconv-submodule-code.c
index 7691182..ca10964 100644
--- a/internal/cgen/base/floatconv-submodule-code.c
+++ b/internal/cgen/base/floatconv-submodule-code.c
@@ -1304,24 +1304,41 @@
// approach taken in wuffs_base__parse_number_f64. The latter is optimized
// for the common cases (e.g. assuming no underscores or a leading '+'
// sign) rather than the full set of cases allowed by the Wuffs API.
- if (h->num_digits <= 19) {
+ //
+ // When we have 19 or fewer mantissa digits, run Eisel-Lemire once (trying
+ // for an exact result). When we have more than 19 mantissa digits, run it
+ // twice to get a lower and upper bound. We still have an exact result
+ // (within f64's rounding margin) if both bounds are equal (and valid).
+ uint32_t i_max = h->num_digits;
+ if (i_max > 19) {
+ i_max = 19;
+ }
+ int32_t exp10 = h->decimal_point - ((int32_t)i_max);
+ if ((-307 <= exp10) && (exp10 <= 288)) {
uint64_t man = 0;
uint32_t i;
- for (i = 0; i < h->num_digits; i++) {
+ for (i = 0; i < i_max; i++) {
man = (10 * man) + h->digits[i];
}
- int32_t exp10 = h->decimal_point - ((int32_t)(h->num_digits));
- if ((man != 0) && (-307 <= exp10) && (exp10 <= 288)) {
- int64_t r =
+ while (man != 0) { // The 'while' is just an 'if' that we can 'break'.
+ int64_t r0 =
wuffs_base__private_implementation__parse_number_f64_eisel_lemire(
- man, exp10);
- if (r >= 0) {
- wuffs_base__result_f64 ret;
- ret.status.repr = NULL;
- ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(
- ((uint64_t)r) | (((uint64_t)(h->negative)) << 63));
- return ret;
+ man + 0, exp10);
+ if (r0 < 0) {
+ break;
+ } else if (h->num_digits > 19) {
+ int64_t r1 =
+ wuffs_base__private_implementation__parse_number_f64_eisel_lemire(
+ man + 1, exp10);
+ if (r1 != r0) {
+ break;
+ }
}
+ wuffs_base__result_f64 ret;
+ ret.status.repr = NULL;
+ ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(
+ ((uint64_t)r0) | (((uint64_t)(h->negative)) << 63));
+ return ret;
}
}
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index d15b619..41edefa 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -14821,24 +14821,41 @@
// approach taken in wuffs_base__parse_number_f64. The latter is optimized
// for the common cases (e.g. assuming no underscores or a leading '+'
// sign) rather than the full set of cases allowed by the Wuffs API.
- if (h->num_digits <= 19) {
+ //
+ // When we have 19 or fewer mantissa digits, run Eisel-Lemire once (trying
+ // for an exact result). When we have more than 19 mantissa digits, run it
+ // twice to get a lower and upper bound. We still have an exact result
+ // (within f64's rounding margin) if both bounds are equal (and valid).
+ uint32_t i_max = h->num_digits;
+ if (i_max > 19) {
+ i_max = 19;
+ }
+ int32_t exp10 = h->decimal_point - ((int32_t)i_max);
+ if ((-307 <= exp10) && (exp10 <= 288)) {
uint64_t man = 0;
uint32_t i;
- for (i = 0; i < h->num_digits; i++) {
+ for (i = 0; i < i_max; i++) {
man = (10 * man) + h->digits[i];
}
- int32_t exp10 = h->decimal_point - ((int32_t)(h->num_digits));
- if ((man != 0) && (-307 <= exp10) && (exp10 <= 288)) {
- int64_t r =
+ while (man != 0) { // The 'while' is just an 'if' that we can 'break'.
+ int64_t r0 =
wuffs_base__private_implementation__parse_number_f64_eisel_lemire(
- man, exp10);
- if (r >= 0) {
- wuffs_base__result_f64 ret;
- ret.status.repr = NULL;
- ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(
- ((uint64_t)r) | (((uint64_t)(h->negative)) << 63));
- return ret;
+ man + 0, exp10);
+ if (r0 < 0) {
+ break;
+ } else if (h->num_digits > 19) {
+ int64_t r1 =
+ wuffs_base__private_implementation__parse_number_f64_eisel_lemire(
+ man + 1, exp10);
+ if (r1 != r0) {
+ break;
+ }
}
+ wuffs_base__result_f64 ret;
+ ret.status.repr = NULL;
+ ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(
+ ((uint64_t)r0) | (((uint64_t)(h->negative)) << 63));
+ return ret;
}
}
diff --git a/script/print-render-number-f64-tests.go b/script/print-render-number-f64-tests.go
index 792e2d0..ef47c1c 100644
--- a/script/print-render-number-f64-tests.go
+++ b/script/print-render-number-f64-tests.go
@@ -182,6 +182,7 @@
0x44B52D02C7E14AF6,
0x46293E5939A08CEA,
0x54B249AD2594C37D,
+ 0x54B2987670ADB613,
0x7BBA44DF832B8D46,
0x7BF06B0BB1FB384C,
0x7C2485CE9E7A065F,
diff --git a/test/c/std/json.c b/test/c/std/json.c
index 420118b..f82bc61 100644
--- a/test/c/std/json.c
+++ b/test/c/std/json.c
@@ -1026,6 +1026,7 @@
{.want = 0x46293E5939A08CEA, .str = "1e30"},
{.want = 0x54B249AD2594C37D, .str = "+1E+100"},
{.want = 0x54B249AD2594C37D, .str = "+_1_E_+_1_0_0_"},
+ {.want = 0x54B2987670ADB613, .str = "1.0168286519992372611942638e+100"},
{.want = 0x7BBA44DF832B8D46, .str = "1e+288"},
{.want = 0x7BF06B0BB1FB384C, .str = "1e+289"},
{.want = 0x7C2485CE9E7A065F, .str = "1e+290"},
@@ -1944,6 +1945,19 @@
.want_4g = "1e+100",
},
{
+ .x = 0x54B2987670ADB613,
+ .want__e = "1.0168286519992373e+100",
+ .want__f = "10168286519992373000000000000000000000000000000000"
+ "00000000000000000000000000000000000000000000000000"
+ "0",
+ .want_0g = "1e+100",
+ .want_2e = "1.02e+100",
+ .want_3f = "10168286519992372611942638135241625267907715220891"
+ "31363562982164291689352904657807934098133246961254"
+ "4.000",
+ .want_4g = "1.017e+100",
+ },
+ {
.x = 0x7BBA44DF832B8D46,
.want__e = "1e+288",
.want__f = "10000000000000000000000000000000000000000000000000"