floatconv: tweak Simple Decimal Conversion This tweaks the code and comments added in commit 36d7748e "Merge pull request #90 from sarastro-nl/improvement/optimize-SDC" and commit d46220ca "optimize SDC". Both of those two commit messages are short but see the discussion re algorithmic changes on PR 87 (yes, 87, not 90): https://github.com/google/wuffs/pull/87 Also run clang-format. Also update doc/changelog.md Compared to before commit d46220ca "optimize SDC", when running script/manual-test-parse-number-f64.cc and disabling both the etc__parse_number_f64_eisel_lemire function and the "If both man and (10 ** exp10) are exactly representable by a double" fast path, the tests still pass but the number of etc__high_prec_dec__small_lshift and etc__high_prec_dec__small_rshift calls drop: Function Before After Ratio lshift 31174584 25762258 82.64% = 100% - 17.36% rshift 21592604 20982502 97.17% = 100% - 2.83% The time taken drops from 15.984s to 13.710s (85.77% = 100% - 14.23%). To repeat, these are the improvements after disabling the Eisel-Lemire and other fast paths. Simple Decimal Conversion is the rarely invoked fallback parse_number_f64 algorithm. The practical improvement (with Eisel-Lemire etc enabled) is smaller.
diff --git a/doc/changelog.md b/doc/changelog.md index fbb05c2..462fd06 100644 --- a/doc/changelog.md +++ b/doc/changelog.md
@@ -7,6 +7,9 @@ input"` instead of `"$short read"`. Importantly, this is an error, not a suspension. +The `wuffs_base__parse_number_f64` function's Simple Decimal Conversion +fallback algorithm has been optimized. + ## 2023-01-26 version 0.3.0
diff --git a/internal/cgen/base/floatconv-submodule-code.c b/internal/cgen/base/floatconv-submodule-code.c index 33fc546..7691182 100644 --- a/internal/cgen/base/floatconv-submodule-code.c +++ b/internal/cgen/base/floatconv-submodule-code.c
@@ -1264,6 +1264,27 @@ // powers converts decimal powers of 10 to binary powers of 2. For example, // (10000 >> 13) is 1. It stops before the elements exceed 60, also known // as WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL. + // + // This rounds down (1<<13 is a lower bound for 1e4). Adding 1 to the array + // element value rounds up (1<<14 is an upper bound for 1e4) while staying + // at or below WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL. + // + // When starting in the range [1e+1 .. 1e+2] (i.e. h->decimal_point == +2), + // powers[2] == 6 and so: + // - Right shifting by 6+0 produces the range [10/64 .. 100/64] = + // [0.156250 .. 1.56250]. The resultant h->decimal_point is +0 or +1. + // - Right shifting by 6+1 produces the range [10/128 .. 100/128] = + // [0.078125 .. 0.78125]. The resultant h->decimal_point is -1 or -0. + // + // When starting in the range [1e-3 .. 1e-2] (i.e. h->decimal_point == -2), + // powers[2] == 6 and so: + // - Left shifting by 6+0 produces the range [0.001*64 .. 0.01*64] = + // [0.064 .. 0.64]. The resultant h->decimal_point is -1 or -0. + // - Left shifting by 6+1 produces the range [0.001*128 .. 0.01*128] = + // [0.128 .. 1.28]. The resultant h->decimal_point is +0 or +1. + // + // Thus, when targeting h->decimal_point being +0 or +1, use (powers[n]+0) + // when right shifting but (powers[n]+1) when left shifting. static const uint32_t num_powers = 19; static const uint8_t powers[19] = { 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, // @@ -1307,9 +1328,10 @@ // When Eisel-Lemire fails, fall back to Simple Decimal Conversion. See // https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html // - // Scale by powers of 2 until we're in the range [1 .. 2]. First we shift - // right, possibly a little too far, ending with a value certainly below - // 10 ... + // Scale by powers of 2 until we're in the range [0.1 .. 10]. Equivalently, + // that h->decimal_point is +0 or +1. + // + // First we shift right while at or above 10... const int32_t f64_bias = -1023; int32_t exp2 = 0; while (h->decimal_point > 1) { @@ -1326,13 +1348,15 @@ } exp2 += (int32_t)shift; } - // ... then we shift left, putting us in [0.1 .. 10]. + // ...then we shift left while below 0.1. while (h->decimal_point < 0) { uint32_t shift; - uint32_t n = (uint32_t)(-h->decimal_point); - shift = (n < num_powers) - ? powers[n] + 1 // shift 1 extra since we're now multiplying - : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL; + uint32_t n = (uint32_t)(-h->decimal_point); + shift = (n < num_powers) + // The +1 is per "when targeting h->decimal_point being +0 or + // +1... when left shifting" in the powers comment above. + ? (powers[n] + 1) + : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL; wuffs_base__private_implementation__high_prec_dec__small_lshift(h, shift); if (h->decimal_point > @@ -1342,40 +1366,38 @@ exp2 -= (int32_t)shift; } - // To get into the range [1 .. 2] we need to look at the first 3 digits of - // the mantisse to determine the final left shift. If we are already between 1 - // and 2 then just shift left by 52. - uint64_t man = 0; - uint32_t i; - for (i = 0; i < 3; i++) { - man = (10 * man) + ((i < h->num_digits) ? h->digits[i] : 0); + // To get from "in the range [0.1 .. 10]" to "in the range [1 .. 2]" (which + // will give us our exponent in base-2), the mantissa's first 3 digits will + // determine the final left shift, equal to 52 (the number of explicit f64 + // bits) plus an additional adjustment. + int man3 = (100 * h->digits[0]) + + ((h->num_digits > 1) ? (10 * h->digits[1]) : 0) + + ((h->num_digits > 2) ? h->digits[2] : 0); + int32_t additional_lshift = 0; + if (h->decimal_point == 0) { // The value is in [0.1 .. 1]. + if (man3 < 125) { + additional_lshift = +4; + } else if (man3 < 250) { + additional_lshift = +3; + } else if (man3 < 500) { + additional_lshift = +2; + } else { + additional_lshift = +1; + } + } else { // The value is in [1 .. 10]. + if (man3 < 200) { + additional_lshift = -0; + } else if (man3 < 400) { + additional_lshift = -1; + } else if (man3 < 800) { + additional_lshift = -2; + } else { + additional_lshift = -3; + } } - int32_t final_lshift = 52; - int32_t additional_shift = 0; - if (h->decimal_point == 0) { // value is in [0.1 .. 1] - if (man < 125) { - additional_shift = -4; - } else if (man < 250) { - additional_shift = -3; - } else if (man < 500) { - additional_shift = -2; - } else { - additional_shift = -1; - } - } else { // value is in [1 .. 10] - if (man < 200) { - additional_shift = 0; - } else if (man < 400) { - additional_shift = 1; - } else if (man < 800) { - additional_shift = 2; - } else { - additional_shift = 3; - } - } - exp2 += additional_shift; - final_lshift -= additional_shift; - + exp2 -= additional_lshift; + uint32_t final_lshift = (uint32_t)(52 + additional_lshift); + // The minimum normal exponent is (f64_bias + 1). while ((f64_bias + 1) > exp2) { uint32_t n = (uint32_t)((f64_bias + 1) - exp2); @@ -1392,7 +1414,8 @@ } // Extract 53 bits for the mantissa (in base-2). - wuffs_base__private_implementation__high_prec_dec__small_lshift(h, final_lshift); + wuffs_base__private_implementation__high_prec_dec__small_lshift( + h, final_lshift); uint64_t man2 = wuffs_base__private_implementation__high_prec_dec__rounded_integer(h);
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c index 828d6e5..400895e 100644 --- a/release/c/wuffs-unsupported-snapshot.c +++ b/release/c/wuffs-unsupported-snapshot.c
@@ -14781,6 +14781,27 @@ // powers converts decimal powers of 10 to binary powers of 2. For example, // (10000 >> 13) is 1. It stops before the elements exceed 60, also known // as WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL. + // + // This rounds down (1<<13 is a lower bound for 1e4). Adding 1 to the array + // element value rounds up (1<<14 is an upper bound for 1e4) while staying + // at or below WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL. + // + // When starting in the range [1e+1 .. 1e+2] (i.e. h->decimal_point == +2), + // powers[2] == 6 and so: + // - Right shifting by 6+0 produces the range [10/64 .. 100/64] = + // [0.156250 .. 1.56250]. The resultant h->decimal_point is +0 or +1. + // - Right shifting by 6+1 produces the range [10/128 .. 100/128] = + // [0.078125 .. 0.78125]. The resultant h->decimal_point is -1 or -0. + // + // When starting in the range [1e-3 .. 1e-2] (i.e. h->decimal_point == -2), + // powers[2] == 6 and so: + // - Left shifting by 6+0 produces the range [0.001*64 .. 0.01*64] = + // [0.064 .. 0.64]. The resultant h->decimal_point is -1 or -0. + // - Left shifting by 6+1 produces the range [0.001*128 .. 0.01*128] = + // [0.128 .. 1.28]. The resultant h->decimal_point is +0 or +1. + // + // Thus, when targeting h->decimal_point being +0 or +1, use (powers[n]+0) + // when right shifting but (powers[n]+1) when left shifting. static const uint32_t num_powers = 19; static const uint8_t powers[19] = { 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, // @@ -14824,9 +14845,10 @@ // When Eisel-Lemire fails, fall back to Simple Decimal Conversion. See // https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html // - // Scale by powers of 2 until we're in the range [1 .. 2]. First we shift - // right, possibly a little too far, ending with a value certainly below - // 10 ... + // Scale by powers of 2 until we're in the range [0.1 .. 10]. Equivalently, + // that h->decimal_point is +0 or +1. + // + // First we shift right while at or above 10... const int32_t f64_bias = -1023; int32_t exp2 = 0; while (h->decimal_point > 1) { @@ -14843,13 +14865,15 @@ } exp2 += (int32_t)shift; } - // ... then we shift left, putting us in [0.1 .. 10]. + // ...then we shift left while below 0.1. while (h->decimal_point < 0) { uint32_t shift; - uint32_t n = (uint32_t)(-h->decimal_point); - shift = (n < num_powers) - ? powers[n] + 1 // shift 1 extra since we're now multiplying - : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL; + uint32_t n = (uint32_t)(-h->decimal_point); + shift = (n < num_powers) + // The +1 is per "when targeting h->decimal_point being +0 or + // +1... when left shifting" in the powers comment above. + ? (powers[n] + 1) + : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL; wuffs_base__private_implementation__high_prec_dec__small_lshift(h, shift); if (h->decimal_point > @@ -14859,40 +14883,38 @@ exp2 -= (int32_t)shift; } - // To get into the range [1 .. 2] we need to look at the first 3 digits of - // the mantisse to determine the final left shift. If we are already between 1 - // and 2 then just shift left by 52. - uint64_t man = 0; - uint32_t i; - for (i = 0; i < 3; i++) { - man = (10 * man) + ((i < h->num_digits) ? h->digits[i] : 0); + // To get from "in the range [0.1 .. 10]" to "in the range [1 .. 2]" (which + // will give us our exponent in base-2), the mantissa's first 3 digits will + // determine the final left shift, equal to 52 (the number of explicit f64 + // bits) plus an additional adjustment. + int man3 = (100 * h->digits[0]) + + ((h->num_digits > 1) ? (10 * h->digits[1]) : 0) + + ((h->num_digits > 2) ? h->digits[2] : 0); + int32_t additional_lshift = 0; + if (h->decimal_point == 0) { // The value is in [0.1 .. 1]. + if (man3 < 125) { + additional_lshift = +4; + } else if (man3 < 250) { + additional_lshift = +3; + } else if (man3 < 500) { + additional_lshift = +2; + } else { + additional_lshift = +1; + } + } else { // The value is in [1 .. 10]. + if (man3 < 200) { + additional_lshift = -0; + } else if (man3 < 400) { + additional_lshift = -1; + } else if (man3 < 800) { + additional_lshift = -2; + } else { + additional_lshift = -3; + } } - int32_t final_lshift = 52; - int32_t additional_shift = 0; - if (h->decimal_point == 0) { // value is in [0.1 .. 1] - if (man < 125) { - additional_shift = -4; - } else if (man < 250) { - additional_shift = -3; - } else if (man < 500) { - additional_shift = -2; - } else { - additional_shift = -1; - } - } else { // value is in [1 .. 10] - if (man < 200) { - additional_shift = 0; - } else if (man < 400) { - additional_shift = 1; - } else if (man < 800) { - additional_shift = 2; - } else { - additional_shift = 3; - } - } - exp2 += additional_shift; - final_lshift -= additional_shift; - + exp2 -= additional_lshift; + uint32_t final_lshift = (uint32_t)(52 + additional_lshift); + // The minimum normal exponent is (f64_bias + 1). while ((f64_bias + 1) > exp2) { uint32_t n = (uint32_t)((f64_bias + 1) - exp2); @@ -14909,7 +14931,8 @@ } // Extract 53 bits for the mantissa (in base-2). - wuffs_base__private_implementation__high_prec_dec__small_lshift(h, final_lshift); + wuffs_base__private_implementation__high_prec_dec__small_lshift( + h, final_lshift); uint64_t man2 = wuffs_base__private_implementation__high_prec_dec__rounded_integer(h);