Optimize high_prec_dec__round_just_enough
name old time/op new time/op delta
wuffs_strconv_render_number_f64_just_enough_fractions/clang5 56.4µs ± 1% 56.3µs ± 1% ~ (p=1.000 n=5+5)
wuffs_strconv_render_number_f64_just_enough_small_integers/clang5 4.12µs ± 2% 1.00µs ± 1% -75.65% (p=0.008 n=5+5)
wuffs_strconv_render_number_f64_just_enough_fractions/gcc7 54.3µs ± 0% 54.2µs ± 0% ~ (p=0.151 n=5+5)
wuffs_strconv_render_number_f64_just_enough_small_integers/gcc7 4.16µs ± 0% 1.01µs ± 0% -75.77% (p=0.000 n=4+5)
diff --git a/internal/cgen/base/f64conv-submodule.c b/internal/cgen/base/f64conv-submodule.c
index 5e48340..e82ccba 100644
--- a/internal/cgen/base/f64conv-submodule.c
+++ b/internal/cgen/base/f64conv-submodule.c
@@ -727,8 +727,9 @@
// Let f be the floating point number represented by exp2 and mantissa (and
// also the number in h): the number (mantissa * (2 ** (exp2 - 52))).
//
- // If f is zero, we can return early.
- if (mantissa == 0) {
+ // If f is zero or a small integer, we can return early.
+ if ((mantissa == 0) ||
+ ((exp2 < 53) && (h->decimal_point >= ((int32_t)(h->num_digits))))) {
return;
}
diff --git a/internal/cgen/data/data.go b/internal/cgen/data/data.go
index 14335a7..b70c85d 100644
--- a/internal/cgen/data/data.go
+++ b/internal/cgen/data/data.go
@@ -59,13 +59,13 @@
"" +
"// --------\n\n// wuffs_base__private_implementation__high_prec_dec__round_etc rounds h's\n// number. For those functions that take an n argument, rounding produces at\n// most n digits (which is not necessarily at most n decimal places). Negative\n// n values are ignored, as well as any n greater than or equal to h's number\n// of digits. The etc__round_just_enough function implicitly chooses an n to\n// implement WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION.\n//\n// Preconditions:\n// - h is non-NULL.\n// - h->decimal_point is \"not extreme\".\n//\n// \"Not extreme\" means within\n// ±WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.\n\nstatic void //\nwuffs_base__private_implementation__high_prec_dec__round_down(\n wuffs_base__private_implementation__high_prec_dec* h,\n int32_t n) {\n if ((n < 0) || (h->num_digits <= (uint32_t)n)) {\n return;\n }\n h->num_digits = (uint32_t)(n);\n wuffs_base__private_implementation__high_prec_dec__trim(h);\n}\n\nstatic void //\nwuffs_base__private_implementation__hi" +
"gh_prec_dec__round_up(\n wuffs_base__private_implementation__high_prec_dec* h,\n int32_t n) {\n if ((n < 0) || (h->num_digits <= (uint32_t)n)) {\n return;\n }\n\n for (n--; n >= 0; n--) {\n if (h->digits[n] < 9) {\n h->digits[n]++;\n h->num_digits = (uint32_t)(n + 1);\n return;\n }\n }\n\n // The number is all 9s. Change to a single 1 and adjust the decimal point.\n h->digits[0] = 1;\n h->num_digits = 1;\n h->decimal_point++;\n}\n\nstatic void //\nwuffs_base__private_implementation__high_prec_dec__round_nearest(\n wuffs_base__private_implementation__high_prec_dec* h,\n int32_t n) {\n if ((n < 0) || (h->num_digits <= (uint32_t)n)) {\n return;\n }\n bool up = h->digits[n] >= 5;\n if ((h->digits[n] == 5) && ((n + 1) == ((int32_t)(h->num_digits)))) {\n up = h->truncated || //\n ((n > 0) && ((h->digits[n - 1] & 1) != 0));\n }\n\n if (up) {\n wuffs_base__private_implementation__high_prec_dec__round_up(h, n);\n } else {\n wuffs_base__private_implementation__high_prec_dec__round_do" +
- "wn(h, n);\n }\n}\n\nstatic void //\nwuffs_base__private_implementation__high_prec_dec__round_just_enough(\n wuffs_base__private_implementation__high_prec_dec* h,\n int32_t exp2,\n uint64_t mantissa) {\n // The magic numbers 52 and 53 in this function are because IEEE 754 double\n // precision has 52 mantissa bits.\n //\n // Let f be the floating point number represented by exp2 and mantissa (and\n // also the number in h): the number (mantissa * (2 ** (exp2 - 52))).\n //\n // If f is zero, we can return early.\n if (mantissa == 0) {\n return;\n }\n\n // The smallest normal f has an exp2 of -1022 and a mantissa of (1 << 52).\n // Subnormal numbers have the same exp2 but a smaller mantissa.\n static const int32_t min_incl_normal_exp2 = -1022;\n static const uint64_t min_incl_normal_mantissa = 0x0010000000000000ul;\n\n // Compute lower and upper bounds such that any number between them (possibly\n // inclusive) will round to f. First, the lower bound. Our number f is:\n // ((mantissa + 0) * (2 ** ( " +
- " exp2 - 52)))\n //\n // The next lowest floating point number is:\n // ((mantissa - 1) * (2 ** ( exp2 - 52)))\n // unless (mantissa - 1) drops the (1 << 52) bit and exp2 is not the\n // min_incl_normal_exp2. Either way, call it:\n // ((l_mantissa) * (2 ** (l_exp2 - 52)))\n //\n // The lower bound is halfway between them (noting that 52 became 53):\n // (((2 * l_mantissa) + 1) * (2 ** (l_exp2 - 53)))\n int32_t l_exp2 = exp2;\n uint64_t l_mantissa = mantissa - 1;\n if ((exp2 > min_incl_normal_exp2) && (mantissa <= min_incl_normal_mantissa)) {\n l_exp2 = exp2 - 1;\n l_mantissa = (2 * mantissa) - 1;\n }\n wuffs_base__private_implementation__high_prec_dec lower;\n wuffs_base__private_implementation__high_prec_dec__assign(\n &lower, (2 * l_mantissa) + 1, false);\n wuffs_base__private_implementation__high_prec_dec__lshift(&lower,\n l_exp2 - 53);\n\n // Next, the upper bound. Our number f is:\n // ((mantissa + 0) * (2 **" +
- " (exp2 - 52)))\n //\n // The next highest floating point number is:\n // ((mantissa + 1) * (2 ** (exp2 - 52)))\n //\n // The upper bound is halfway between them (noting that 52 became 53):\n // (((2 * mantissa) + 1) * (2 ** (exp2 - 53)))\n wuffs_base__private_implementation__high_prec_dec upper;\n wuffs_base__private_implementation__high_prec_dec__assign(\n &upper, (2 * mantissa) + 1, false);\n wuffs_base__private_implementation__high_prec_dec__lshift(&upper, exp2 - 53);\n\n // The lower and upper bounds are possible outputs only if the original\n // mantissa is even, so that IEEE round-to-even would round to the original\n // mantissa and not its neighbors.\n bool inclusive = (mantissa & 1) == 0;\n\n // As we walk the digits, we want to know whether rounding up would fall\n // within the upper bound. This is tracked by upper_delta:\n // - When -1, the digits of h and upper are the same so far.\n // - When +0, we saw a difference of 1 between h and upper on a previous\n // digit and subsequen" +
- "tly only 9s for h and 0s for upper. Thus, rounding\n // up may fall outside of the bound if !inclusive.\n // - When +1, the difference is greater than 1 and we know that rounding up\n // falls within the bound.\n //\n // This is a state machine with three states. The numerical value for each\n // state (-1, +0 or +1) isn't important, other than their order.\n int upper_delta = -1;\n\n // We can now figure out the shortest number of digits required. Walk the\n // digits until h has distinguished itself from lower or upper.\n //\n // The zi and zd variables are indexes and digits, for z in l (lower), h (the\n // number) and u (upper).\n //\n // The lower, h and upper numbers may have their decimal points at different\n // places. In this case, upper is the longest, so we iterate ui starting from\n // 0 and iterate li and hi starting from either 0 or -1.\n int32_t ui = 0;\n for (;; ui++) {\n // Calculate hd, the middle number's digit.\n int32_t hi = ui - upper.decimal_point + h->decimal_point;\n if (" +
- "hi >= ((int32_t)(h->num_digits))) {\n break;\n }\n uint8_t hd = (((uint32_t)hi) < h->num_digits) ? h->digits[hi] : 0;\n\n // Calculate ld, the lower bound's digit.\n int32_t li = ui - upper.decimal_point + lower.decimal_point;\n uint8_t ld = (((uint32_t)li) < lower.num_digits) ? lower.digits[li] : 0;\n\n // We can round down (truncate) if lower has a different digit than h or if\n // lower is inclusive and is exactly the result of rounding down (i.e. we\n // have reached the final digit of lower).\n bool can_round_down =\n (ld != hd) || //\n (inclusive && ((li + 1) == ((int32_t)(lower.num_digits))));\n\n // Calculate ud, the upper bound's digit, and update upper_delta.\n uint8_t ud = (((uint32_t)ui) < upper.num_digits) ? upper.digits[ui] : 0;\n if (upper_delta < 0) {\n if ((hd + 1) < ud) {\n // For example:\n // h = 12345???\n // upper = 12347???\n upper_delta = +1;\n } else if (hd != ud) {\n // For example:\n // h = 123" +
- "45???\n // upper = 12346???\n upper_delta = +0;\n }\n } else if (upper_delta == 0) {\n if ((hd != 9) || (ud != 0)) {\n // For example:\n // h = 1234598?\n // upper = 1234600?\n upper_delta = +1;\n }\n }\n\n // We can round up if upper has a different digit than h and either upper\n // is inclusive or upper is bigger than the result of rounding up.\n bool can_round_up =\n (upper_delta > 0) || //\n ((upper_delta == 0) && //\n (inclusive || ((ui + 1) < ((int32_t)(upper.num_digits)))));\n\n // If we can round either way, round to nearest. If we can round only one\n // way, do it. If we can't round, continue the loop.\n if (can_round_down) {\n if (can_round_up) {\n wuffs_base__private_implementation__high_prec_dec__round_nearest(\n h, hi + 1);\n return;\n } else {\n wuffs_base__private_implementation__high_prec_dec__round_down(h,\n " +
- " hi + 1);\n return;\n }\n } else {\n if (can_round_up) {\n wuffs_base__private_implementation__high_prec_dec__round_up(h, hi + 1);\n return;\n }\n }\n }\n}\n\n" +
+ "wn(h, n);\n }\n}\n\nstatic void //\nwuffs_base__private_implementation__high_prec_dec__round_just_enough(\n wuffs_base__private_implementation__high_prec_dec* h,\n int32_t exp2,\n uint64_t mantissa) {\n // The magic numbers 52 and 53 in this function are because IEEE 754 double\n // precision has 52 mantissa bits.\n //\n // Let f be the floating point number represented by exp2 and mantissa (and\n // also the number in h): the number (mantissa * (2 ** (exp2 - 52))).\n //\n // If f is zero or a small integer, we can return early.\n if ((mantissa == 0) ||\n ((exp2 < 53) && (h->decimal_point >= ((int32_t)(h->num_digits))))) {\n return;\n }\n\n // The smallest normal f has an exp2 of -1022 and a mantissa of (1 << 52).\n // Subnormal numbers have the same exp2 but a smaller mantissa.\n static const int32_t min_incl_normal_exp2 = -1022;\n static const uint64_t min_incl_normal_mantissa = 0x0010000000000000ul;\n\n // Compute lower and upper bounds such that any number between them (possibly\n // inclusive) wil" +
+ "l round to f. First, the lower bound. Our number f is:\n // ((mantissa + 0) * (2 ** ( exp2 - 52)))\n //\n // The next lowest floating point number is:\n // ((mantissa - 1) * (2 ** ( exp2 - 52)))\n // unless (mantissa - 1) drops the (1 << 52) bit and exp2 is not the\n // min_incl_normal_exp2. Either way, call it:\n // ((l_mantissa) * (2 ** (l_exp2 - 52)))\n //\n // The lower bound is halfway between them (noting that 52 became 53):\n // (((2 * l_mantissa) + 1) * (2 ** (l_exp2 - 53)))\n int32_t l_exp2 = exp2;\n uint64_t l_mantissa = mantissa - 1;\n if ((exp2 > min_incl_normal_exp2) && (mantissa <= min_incl_normal_mantissa)) {\n l_exp2 = exp2 - 1;\n l_mantissa = (2 * mantissa) - 1;\n }\n wuffs_base__private_implementation__high_prec_dec lower;\n wuffs_base__private_implementation__high_prec_dec__assign(\n &lower, (2 * l_mantissa) + 1, false);\n wuffs_base__private_implementation__high_prec_dec__lshift(&lower,\n " +
+ "l_exp2 - 53);\n\n // Next, the upper bound. Our number f is:\n // ((mantissa + 0) * (2 ** (exp2 - 52)))\n //\n // The next highest floating point number is:\n // ((mantissa + 1) * (2 ** (exp2 - 52)))\n //\n // The upper bound is halfway between them (noting that 52 became 53):\n // (((2 * mantissa) + 1) * (2 ** (exp2 - 53)))\n wuffs_base__private_implementation__high_prec_dec upper;\n wuffs_base__private_implementation__high_prec_dec__assign(\n &upper, (2 * mantissa) + 1, false);\n wuffs_base__private_implementation__high_prec_dec__lshift(&upper, exp2 - 53);\n\n // The lower and upper bounds are possible outputs only if the original\n // mantissa is even, so that IEEE round-to-even would round to the original\n // mantissa and not its neighbors.\n bool inclusive = (mantissa & 1) == 0;\n\n // As we walk the digits, we want to know whether rounding up would fall\n // within the upper bound. This is tracked by upper_delta:\n // - When -1, the digits of h and upper are the same so far.\n // -" +
+ " When +0, we saw a difference of 1 between h and upper on a previous\n // digit and subsequently only 9s for h and 0s for upper. Thus, rounding\n // up may fall outside of the bound if !inclusive.\n // - When +1, the difference is greater than 1 and we know that rounding up\n // falls within the bound.\n //\n // This is a state machine with three states. The numerical value for each\n // state (-1, +0 or +1) isn't important, other than their order.\n int upper_delta = -1;\n\n // We can now figure out the shortest number of digits required. Walk the\n // digits until h has distinguished itself from lower or upper.\n //\n // The zi and zd variables are indexes and digits, for z in l (lower), h (the\n // number) and u (upper).\n //\n // The lower, h and upper numbers may have their decimal points at different\n // places. In this case, upper is the longest, so we iterate ui starting from\n // 0 and iterate li and hi starting from either 0 or -1.\n int32_t ui = 0;\n for (;; ui++) {\n // Calculate hd, t" +
+ "he middle number's digit.\n int32_t hi = ui - upper.decimal_point + h->decimal_point;\n if (hi >= ((int32_t)(h->num_digits))) {\n break;\n }\n uint8_t hd = (((uint32_t)hi) < h->num_digits) ? h->digits[hi] : 0;\n\n // Calculate ld, the lower bound's digit.\n int32_t li = ui - upper.decimal_point + lower.decimal_point;\n uint8_t ld = (((uint32_t)li) < lower.num_digits) ? lower.digits[li] : 0;\n\n // We can round down (truncate) if lower has a different digit than h or if\n // lower is inclusive and is exactly the result of rounding down (i.e. we\n // have reached the final digit of lower).\n bool can_round_down =\n (ld != hd) || //\n (inclusive && ((li + 1) == ((int32_t)(lower.num_digits))));\n\n // Calculate ud, the upper bound's digit, and update upper_delta.\n uint8_t ud = (((uint32_t)ui) < upper.num_digits) ? upper.digits[ui] : 0;\n if (upper_delta < 0) {\n if ((hd + 1) < ud) {\n // For example:\n // h = 12345???\n // upper = 12347???\n " +
+ " upper_delta = +1;\n } else if (hd != ud) {\n // For example:\n // h = 12345???\n // upper = 12346???\n upper_delta = +0;\n }\n } else if (upper_delta == 0) {\n if ((hd != 9) || (ud != 0)) {\n // For example:\n // h = 1234598?\n // upper = 1234600?\n upper_delta = +1;\n }\n }\n\n // We can round up if upper has a different digit than h and either upper\n // is inclusive or upper is bigger than the result of rounding up.\n bool can_round_up =\n (upper_delta > 0) || //\n ((upper_delta == 0) && //\n (inclusive || ((ui + 1) < ((int32_t)(upper.num_digits)))));\n\n // If we can round either way, round to nearest. If we can round only one\n // way, do it. If we can't round, continue the loop.\n if (can_round_down) {\n if (can_round_up) {\n wuffs_base__private_implementation__high_prec_dec__round_nearest(\n h, hi + 1);\n return;\n } else {\n wuffs_base__private_implementat" +
+ "ion__high_prec_dec__round_down(h,\n hi + 1);\n return;\n }\n } else {\n if (can_round_up) {\n wuffs_base__private_implementation__high_prec_dec__round_up(h, hi + 1);\n return;\n }\n }\n }\n}\n\n" +
"" +
"// --------\n\n// The wuffs_base__private_implementation__etc_powers_of_10 tables were printed\n// by script/print-mpb-powers-of-10.go. That script has an optional -comments\n// flag, whose output is not copied here, which prints further detail.\n//\n// These tables are used in\n// wuffs_base__private_implementation__medium_prec_bin__assign_from_hpd.\n\n// wuffs_base__private_implementation__big_powers_of_10 contains approximations\n// to the powers of 10, ranging from 1e-348 to 1e+340, with the exponent\n// stepping by 8: -348, -340, -332, ..., -12, -4, +4, +12, ..., +340. Each step\n// consists of three uint32_t elements. There are 87 triples, 87 * 3 = 261.\n//\n// For example, the third approximation, for 1e-332, consists of the uint32_t\n// triple (0x3055AC76, 0x8B16FB20, 0xFFFFFB72). The first two of that triple\n// are a little-endian uint64_t value: 0x8B16FB203055AC76. The last one is an\n// int32_t value: -1166. Together, they represent the approximation:\n// 1e-332 ≈ 0x8B16FB203055AC76 * (2 ** -1166)\n// Similarly," +
" the (0x00000000, 0x9C400000, 0xFFFFFFCE) uint32_t triple means:\n// 1e+4 ≈ 0x9C40000000000000 * (2 ** -50) // This approx'n is exact.\n// Similarly, the (0xD4C4FB27, 0xED63A231, 0x000000A2) uint32_t triple means:\n// 1e+68 ≈ 0xED63A231D4C4FB27 * (2 ** 162)\nstatic const uint32_t\n wuffs_base__private_implementation__big_powers_of_10[261] = {\n 0x081C0288, 0xFA8FD5A0, 0xFFFFFB3C, 0xA23EBF76, 0xBAAEE17F, 0xFFFFFB57,\n 0x3055AC76, 0x8B16FB20, 0xFFFFFB72, 0x5DCE35EA, 0xCF42894A, 0xFFFFFB8C,\n 0x55653B2D, 0x9A6BB0AA, 0xFFFFFBA7, 0x3D1A45DF, 0xE61ACF03, 0xFFFFFBC1,\n 0xC79AC6CA, 0xAB70FE17, 0xFFFFFBDC, 0xBEBCDC4F, 0xFF77B1FC, 0xFFFFFBF6,\n 0x416BD60C, 0xBE5691EF, 0xFFFFFC11, 0x907FFC3C, 0x8DD01FAD, 0xFFFFFC2C,\n 0x31559A83, 0xD3515C28, 0xFFFFFC46, 0xADA6C9B5, 0x9D71AC8F, 0xFFFFFC61,\n 0x23EE8BCB, 0xEA9C2277, 0xFFFFFC7B, 0x4078536D, 0xAECC4991, 0xFFFFFC96,\n 0x5DB6CE57, 0x823C1279, 0xFFFFFCB1, 0x4DFB5637, 0xC2109436, 0xFFFFFCCB,\n 0x3848984F, 0x909" +
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 9032742..24a88d0 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -9673,8 +9673,9 @@
// Let f be the floating point number represented by exp2 and mantissa (and
// also the number in h): the number (mantissa * (2 ** (exp2 - 52))).
//
- // If f is zero, we can return early.
- if (mantissa == 0) {
+ // If f is zero or a small integer, we can return early.
+ if ((mantissa == 0) ||
+ ((exp2 < 53) && (h->decimal_point >= ((int32_t)(h->num_digits))))) {
return;
}
diff --git a/script/print-render-number-f64-tests.go b/script/print-render-number-f64-tests.go
index b4aec2b..df0a548 100644
--- a/script/print-render-number-f64-tests.go
+++ b/script/print-render-number-f64-tests.go
@@ -160,6 +160,8 @@
0x4330000000000000,
0x4330000000000001,
0x4330000000000002,
+ 0x433FFFFFFFFFFFFE,
+ 0x433FFFFFFFFFFFFF,
0x4340000000000000,
0x4340000000000001,
0x4340000000000002,
diff --git a/test/c/std/json.c b/test/c/std/json.c
index 5924dd4..56740ae 100644
--- a/test/c/std/json.c
+++ b/test/c/std/json.c
@@ -659,6 +659,8 @@
{.want = 0x4330000000000001, .str = "4503599627370497"},
{.want = 0x4330000000000002, .str = "4503599627370497.5"},
{.want = 0x4330000000000002, .str = "4503599627370498"},
+ {.want = 0x433FFFFFFFFFFFFE, .str = "9007199254740990"},
+ {.want = 0x433FFFFFFFFFFFFF, .str = "9.007199254740991e+15"},
{.want = 0x4340000000000000, .str = "9007199254740992"}, // 1 << 53.
{.want = 0x4340000000000000, .str = "9007199254740993"},
{.want = 0x4340000000000001, .str = "9007199254740994"},
@@ -1275,6 +1277,24 @@
.want_4g = "4.504e+15",
},
{
+ .x = 0x433FFFFFFFFFFFFE,
+ .want__e = "9.00719925474099e+15",
+ .want__f = "9007199254740990",
+ .want_0g = "9e+15",
+ .want_2e = "9.01e+15",
+ .want_3f = "9007199254740990.000",
+ .want_4g = "9.007e+15",
+ },
+ {
+ .x = 0x433FFFFFFFFFFFFF,
+ .want__e = "9.007199254740991e+15",
+ .want__f = "9007199254740991",
+ .want_0g = "9e+15",
+ .want_2e = "9.01e+15",
+ .want_3f = "9007199254740991.000",
+ .want_4g = "9.007e+15",
+ },
+ {
.x = 0x4340000000000000,
.want__e = "9.007199254740992e+15",
.want__f = "9007199254740992",